diff -u -r --new-file linux/Documentation/Configure.help v2.4.0-test8/linux/Documentation/Configure.help --- linux/Documentation/Configure.help Mon Sep 11 15:22:05 2000 +++ v2.4.0-test8/linux/Documentation/Configure.help Mon Sep 11 05:21:42 2000 @@ -10457,6 +10457,31 @@ called minix.o. Note that the file system of your root partition (the one containing the directory /) cannot be compiled as a module. +Reiserfs support +CONFIG_REISERFS_FS + New, faster, space saving filesystem, based on a balanced tree algorithm. + Uses journaling, and includes a filesystem resizer. You can use reiserfs + in all cases where you use the ext2fs file system. It has fewer worst + case performance situations than other file systems. It is more easily + extended to have features currently found in database and keyword search + systems than block allocation based filesystems are. The next version + will be so extended, and will support plugins consistent with our motto + ``It takes more than a license to make source code open.'' + Mount using the -notail option if you care about performance more than + space. Read http://www.devlinux.com/namesys to learn more about reiserfs. + Sponsored by SuSE, MP3.com, and ecila.org. Have fun. + +CONFIG_REISERFS_CHECK + If you set this to yes, then ReiserFS will perform every check it + can possibly imagine of its internal consistency throughout its + operation. It will also go substantially slower. More than once we + have forgotten that this was on, and then gone despondent over the + latest benchmarks.:-) Use of this option allows our team to go all + out in checking for consistency when debugging without fear of its + effect on end users. If you are on the verge of sending in a bug + report, say yes and you might get a useful error message. Almost + everyone should say no. + Second extended fs support CONFIG_EXT2_FS This is the de facto standard Linux file system (method to organize diff -u -r --new-file linux/fs/Config.in v2.4.0-test8/linux/fs/Config.in --- linux/fs/Config.in Tue Aug 29 18:17:54 2000 +++ v2.4.0-test8/linux/fs/Config.in Fri Aug 25 18:24:29 2000 @@ -8,6 +8,8 @@ tristate 'Kernel automounter support' CONFIG_AUTOFS_FS tristate 'Kernel automounter version 4 support (also supports v3)' CONFIG_AUTOFS4_FS +dep_tristate 'Reiserfs support' CONFIG_REISERFS_FS $CONFIG_EXPERIMENTAL +dep_mbool ' Have reiserfs do extra internal checking' CONFIG_REISERFS_CHECK $CONFIG_REISERFS_FS $CONFIG_EXPERIMENTAL dep_tristate 'ADFS file system support' CONFIG_ADFS_FS $CONFIG_EXPERIMENTAL dep_mbool ' ADFS write support (DANGEROUS)' CONFIG_ADFS_FS_RW $CONFIG_ADFS_FS $CONFIG_EXPERIMENTAL diff -u -r --new-file linux/fs/inode.c v2.4.0-test8/linux/fs/inode.c --- linux/fs/inode.c Tue Aug 29 18:17:54 2000 +++ v2.4.0-test8/linux/fs/inode.c Fri Aug 25 18:24:29 2000 @@ -125,7 +125,10 @@ void __mark_inode_dirty(struct inode *inode) { struct super_block * sb = inode->i_sb; - + if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->dirty_inode){ + inode->i_sb->s_op->dirty_inode(inode) ; + return ; + } if (sb) { spin_lock(&inode_lock); if (!(inode->i_state & I_DIRTY)) { @@ -594,7 +597,17 @@ spin_unlock(&inode_lock); clean_inode(inode); - sb->s_op->read_inode(inode); + + /* reiserfs specific hack right here. We don't + ** want this to last, and are looking for VFS changes + ** that will allow us to get rid of it. + ** -- mason@suse.com + */ + if (sb->s_op->read_inode2) { + sb->s_op->read_inode2(inode, opaque) ; + } else { + sb->s_op->read_inode(inode); + } /* * This is special! We do not need the spinlock diff -u -r --new-file linux/fs/reiserfs/Makefile v2.4.0-test8/linux/fs/reiserfs/Makefile --- linux/fs/reiserfs/Makefile Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/Makefile Fri Aug 25 18:24:29 2000 @@ -0,0 +1,20 @@ +# +# Makefile for the linux reiser-filesystem routines. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +O_TARGET := reiserfs.o +O_OBJS := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o super.o prints.o objectid.o \ +lbalance.o ibalance.o stree.o hashes.o buffer2.o tail_conversion.o journal.o resize.o tail_conversion.o version.o item_ops.o ioctl.o + +M_OBJS := $(O_TARGET) + +include $(TOPDIR)/Rules.make + +TAGS: + etags *.c + diff -u -r --new-file linux/fs/reiserfs/README v2.4.0-test8/linux/fs/reiserfs/README --- linux/fs/reiserfs/README Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/README Sun May 14 23:37:04 2000 @@ -0,0 +1,124 @@ +Reiserfs is a file system based on balanced tree algorithms, which is described at http://devlinux.com/namesys. + +Stop reading here. Go there, then return. + +Send bug reports to reiser@idiom.com, or vs@namesys.botik.ru, or both. + +mkreiserfs and other utilities are in reiserfs/utils, or wherever your +Linux provider put them. Note that some of the utilities cannot be +compiled without accessing to the balancing code which is in the kernel +code, and relocating the utilities may require you to specify where that +code can be found. + +Try turning REISERFS_CHECK in reiserfs_fs.h on or off if you want error +checking or speed, respectively. Real users, as opposed to folks who +want to hack, will want it off. + +[LICENSE] ReiserFS is hereby licensed under the GNU General +Public License version 2. Please see the file "COPYING" +which should have accompanied this software distribution for +details of that license. + +Since that license (particularly 2.b) is necessarily vague in certain +areas due to its generality, the following interpretations shall govern. +Some may consider these terms to be a supplemental license to the GPL. +You may include ReiserFS in a Linux kernel which you may then include +with anything, and you may even include it with a Linux kernel with +non-GPL'd kernel modules. You may include it in any kernel which is +wholly GPL'd including its kernel modules which you may then include +with anything. If you wish to use it for a kernel which you sell usage +or copying licenses for, which is not listed above, then you must obtain +an additional license. If you wish to integrate it with any other +software system which is not GPL'd, without integrating it into an +operating system kernel, then you must obtain an additional license. +This is an interpretation of what is and is not part of the software +program falling under the GPL section 2.b., and is intended as a +specification of (with a slight supplement to), not an exception to, the +GPL as applied to this particular piece of software. + +Further licensing options are available for commercial and/or other +interests directly from Hans Reiser: reiser@idiom.com. If you +interpret the GPL as not allowing those additional licensing options, +you read it wrongly, when carefully read you can see that those +restrictions on additional terms do not apply to the owner of the +copyright, and my interpretation of this shall govern for this license. + +[END LICENSE] + +I try to find ways to make money for those who contribute to this code. +My success in such efforts cannot be guaranteed though:-). You are +encouraged to add to it. If you ask me to include it in my standard +distribution, and you don't request an exception to this (if you want +the exception, talk to me about it, don't be shy), you give ownership of +the copyright to me, and you consent to my marketing exceptions to the +GPL. If you want to market it as part of a system you sell, please +contact me. + +Hideous Commercial Pitch: Spread your development costs across other OS +vendors. Select from the best in the world, not the best in your +building, by buying from third party OS component suppliers. Leverage +the software component development power of the internet. Be the most +aggressive in taking advantage of the commercial possibilities of +decentralized internet development, and add value through your branded +integration that you sell as an operating system. Let your competitors +be the ones to compete against the entire internet by themselves. Be +hip, get with the new economic trend, before your competitors do. Send +email to reiser@idiom.com. + +To understand the code, after reading the website, start reading the +code by reading reiserfs_fs.h first. + +Hans Reiser was the project initiator, primary architect, source of all +funding for the first 5.5 years, and one of the programmers. He owns +the copyright. + +Vladimir Saveljev was one of the programmers, and he worked long hours +writing the cleanest code. He always made the effort to be the best he +could be, and to make his code the best that it could be. What resulted +was quite remarkable. I don't think that money can ever motivate someone +to work the way he did, he is one of the most selfless men I know. + +Yura joined us near the end of our project, and helped with such things +as integrating hashing back into the code, and benchmarking different +pieces of code to help with tuning analysis. + +Anatoly Pinchuk is a former member of our team who worked closely with +Vladimir throughout the project's development. He wrote a quite +substantial portion of the total code. He realized that there was a +space problem with packing tails of files for files larger than a node +that start on a node aligned boundary (there are reasons to want to node +align files), and he invented and implemented indirect items and +unformatted nodes as the solution. + +Two other former employees were involved who won't be getting credit +here because they tried to kill the project at the end of it, and almost +succeeded (they cost me maybe two years). They wanted to force me to +sell it to the company they tried to start. They get to keep the money +they got from me, and that is it. When their "VC" said that he could +get a hundred researchers to swear in Russian Court that I had had +nothing to do with the development of treefs, I changed the name to +reiserfs and registered the copyright. + +Grigory Zaigralin was an extremely effective system administrator for our group. + +Igor Krasheninnikov was wonderful at hardware procurement, repair, and +network installation. + +Jeremy Fitzhardinge wrote the teahash.c code, and he gives credit to a +textbook he got the algorithm from in the code. Note that his analysis +of how we could use the hashing code in making 32 bit NFS cookies work +was probably more important than the actual algorithm. Colin Plumb also +contributed to it. + +Chris Mason dived right into our code, and in just a few months produced +the journaling code that dramatically increased the value of ReiserFS. +He is just an amazing programmer. + +Igor Zagorovsky is writing much of the new item handler and extent code +for our next major release. + +Ecila, MP3, and SuSE (in chronological order) made it possible for me to +not have a day job anymore, and to dramatically increase our staffing. +Ecila funded hypertext feature development, MP3 funded journaling, and +SuSE funded core development. SuSE has helped in much more than just +funding.... diff -u -r --new-file linux/fs/reiserfs/bitmap.c v2.4.0-test8/linux/fs/reiserfs/bitmap.c --- linux/fs/reiserfs/bitmap.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/bitmap.c Mon Sep 18 17:01:19 2000 @@ -0,0 +1,630 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ +// +// Ext2's preallocation idea was used for current reiserfs preallocation +// preallocation +// + +#ifdef __KERNEL__ + +#include <linux/sched.h> +#include <linux/reiserfs_fs.h> +#include <linux/locks.h> +#include <asm/bitops.h> + +#else + +#include "nokernel.h" + +#endif + + +#ifdef CONFIG_REISERFS_CHECK + +/* this is a safety check to make sure +** blocks are reused properly. +** +** this checks, that block can be reused, and it has correct state +** (free or busy) +*/ +int is_reusable (struct super_block * s, unsigned long block, int bit_value) +{ + int i, j; + + if (block == 0 || block >= SB_BLOCK_COUNT (s)) { + reiserfs_warning ("vs-4010: is_reusable: block number is out of range %lu (%u)\n", + block, SB_BLOCK_COUNT (s)); + return 0; + } + + /* it can't be one of the bitmap blocks */ + for (i = 0; i < SB_BMAP_NR (s); i ++) + if (block == SB_AP_BITMAP (s)[i]->b_blocknr) { + reiserfs_warning ("vs: 4020: is_reusable: " + "bitmap block %lu(%u) can't be freed or reused\n", + block, SB_BMAP_NR (s)); + return 0; + } + + i = block / (s->s_blocksize << 3); + if (i >= SB_BMAP_NR (s)) { + reiserfs_warning ("vs-4030: is_reusable: there is no so many bitmap blocks: " + "block=%lu, bitmap_nr=%d\n", block, i); + return 0; + } + + j = block % (s->s_blocksize << 3); + if ((bit_value == 0 && + reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i]->b_data)) || + (bit_value == 1 && + reiserfs_test_le_bit(j, SB_AP_BITMAP (s)[i]->b_data) == 0)) { + reiserfs_warning ("vs-4040: is_reusable: corresponding bit of block %lu does not " + "match required value (i==%d, j==%d) test_bit==%d\n", + block, i, j, reiserfs_test_le_bit (j, SB_AP_BITMAP (s)[i]->b_data)); + return 0; + } + + if (bit_value == 0 && block == SB_ROOT_BLOCK (s)) { + reiserfs_warning ("vs-4050: is_reusable: this is root block (%u), " + "it must be busy", SB_ROOT_BLOCK (s)); + return 0; + } + + return 1; +} + + + + +#endif /* CONFIG_REISERFS_CHECK */ + +#if 0 +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ +int is_used (struct super_block * s, unsigned long block) +{ + int i, j; + + i = block / (s->s_blocksize << 3); + j = block % (s->s_blocksize << 3); + if (reiserfs_test_le_bit(j, SB_AP_BITMAP (s)[i]->b_data)) + return 1; + return 0; + +} +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ +#endif + + +/* get address of corresponding bit (bitmap block number and offset in it) */ +static inline void get_bit_address (struct super_block * s, unsigned long block, int * bmap_nr, int * offset) +{ + /* It is in the bitmap block number equal to the block number divided by the number of + bits in a block. */ + *bmap_nr = block / (s->s_blocksize << 3); + /* Within that bitmap block it is located at bit offset *offset. */ + *offset = block % (s->s_blocksize << 3); + return; +} + + +/* There would be a modest performance benefit if we write a version + to free a list of blocks at once. -Hans */ +void reiserfs_free_block (struct reiserfs_transaction_handle *th, unsigned long block) +{ + struct super_block * s = th->t_super; + struct reiserfs_super_block * rs; + struct buffer_head * sbh; + struct buffer_head ** apbh; + int nr, offset; + +#ifdef CONFIG_REISERFS_CHECK + if (!s) + reiserfs_panic (s, "vs-4060: reiserfs_free_block: trying to free block on nonexistent device"); + + if (is_reusable (s, block, 1) == 0) + reiserfs_panic (s, "vs-4070: reiserfs_free_block: can not free such block"); +#endif + + rs = SB_DISK_SUPER_BLOCK (s); + sbh = SB_BUFFER_WITH_SB (s); + apbh = SB_AP_BITMAP (s); + + get_bit_address (s, block, &nr, &offset); + + /* mark it before we clear it, just in case */ + journal_mark_freed(th, s, block) ; + + reiserfs_prepare_for_journal(s, apbh[nr], 1 ) ; + + /* clear bit for the given block in bit map */ + if (!reiserfs_test_and_clear_le_bit (offset, apbh[nr]->b_data)) { + reiserfs_warning ("vs-4080: reiserfs_free_block: " + "free_block (%04x:%lu)[dev:blocknr]: bit already cleared\n", + s->s_dev, block); + } + journal_mark_dirty (th, s, apbh[nr]); + + reiserfs_prepare_for_journal(s, sbh, 1) ; + /* update super block */ + rs->s_free_blocks = cpu_to_le32 (le32_to_cpu (rs->s_free_blocks) + 1); + + journal_mark_dirty (th, s, sbh); + s->s_dirt = 1; +} + + + +/* beginning from offset-th bit in bmap_nr-th bitmap block, + find_forward finds the closest zero bit. It returns 1 and zero + bit address (bitmap, offset) if zero bit found or 0 if there is no + zero bit in the forward direction */ +/* The function is NOT SCHEDULE-SAFE! */ +static int find_forward (struct super_block * s, int * bmap_nr, int * offset, int for_unformatted) +{ + int i, j; + struct buffer_head * bh; + unsigned long block_to_try = 0; + unsigned long next_block_to_try = 0 ; + + for (i = *bmap_nr; i < SB_BMAP_NR (s); i ++, *offset = 0) { + /* get corresponding bitmap block */ + bh = SB_AP_BITMAP (s)[i]; + if (buffer_locked (bh)) { + __wait_on_buffer (bh); + } +retry: + j = reiserfs_find_next_zero_le_bit ((unsigned long *)bh->b_data, + s->s_blocksize << 3, *offset); + + /* wow, this really needs to be redone. We can't allocate a block if + ** it is in the journal somehow. reiserfs_in_journal makes a suggestion + ** for a good block if the one you ask for is in the journal. Note, + ** reiserfs_in_journal might reject the block it suggests. The big + ** gain from the suggestion is when a big file has been deleted, and + ** many blocks show free in the real bitmap, but are all not free + ** in the journal list bitmaps. + ** + ** this whole system sucks. The bitmaps should reflect exactly what + ** can and can't be allocated, and the journal should update them as + ** it goes. TODO. + */ + if (j < (s->s_blocksize << 3)) { + block_to_try = (i * (s->s_blocksize << 3)) + j; + + /* the block is not in the journal, we can proceed */ + if (!(reiserfs_in_journal(s, s->s_dev, block_to_try, s->s_blocksize, for_unformatted, &next_block_to_try))) { + *bmap_nr = i; + *offset = j; + return 1; + } + /* the block is in the journal */ + else if ((j+1) < (s->s_blocksize << 3)) { /* try again */ + /* reiserfs_in_journal suggested a new block to try */ + if (next_block_to_try > 0) { + int new_i ; + get_bit_address (s, next_block_to_try, &new_i, offset); + + /* block is not in this bitmap. reset i and continue + ** we only reset i if new_i is in a later bitmap. + */ + if (new_i > i) { + i = (new_i - 1 ); /* i gets incremented by the for loop */ + continue ; + } + } else { + /* no suggestion was made, just try the next block */ + *offset = j+1 ; + } + goto retry ; + } + } + } + /* zero bit not found */ + return 0; +} + +/* return 0 if no free blocks, else return 1 */ +/* The function is NOT SCHEDULE-SAFE! +** because the bitmap block we want to change could be locked, and on its +** way to the disk when we want to read it, and because of the +** flush_async_commits. Per bitmap block locks won't help much, and +** really aren't needed, as we retry later on if we try to set the bit +** and it is already set. +*/ +static int find_zero_bit_in_bitmap (struct super_block * s, + unsigned long search_start, + int * bmap_nr, int * offset, + int for_unformatted) +{ + int retry_count = 0 ; + /* get bit location (bitmap number and bit offset) of search_start block */ + get_bit_address (s, search_start, bmap_nr, offset); + + /* note that we search forward in the bitmap, benchmarks have shown that it is better to allocate in increasing + sequence, which is probably due to the disk spinning in the forward direction.. */ + if (find_forward (s, bmap_nr, offset, for_unformatted) == 0) { + /* there wasn't a free block with number greater than our + starting point, so we are going to go to the beginning of the disk */ + +retry: + search_start = 0; /* caller will reset search_start for itself also. */ + get_bit_address (s, search_start, bmap_nr, offset); + if (find_forward (s, bmap_nr,offset,for_unformatted) == 0) { + if (for_unformatted) { + if (retry_count == 0) { + /* we've got a chance that flushing async commits will free up + ** some space. Sync then retry + */ + flush_async_commits(s) ; + retry_count++ ; + goto retry ; + } else if (retry_count > 0) { + /* nothing more we can do. Make the others wait, flush + ** all log blocks to disk, and flush to their home locations. + ** this will free up any blocks held by the journal + */ + SB_JOURNAL(s)->j_must_wait = 1 ; + } + } + return 0; + } + } + return 1; +} + +/* get amount_needed free block numbers from scanning the bitmap of + free/used blocks. + + Optimize layout by trying to find them starting from search_start + and moving in increasing blocknr direction. (This was found to be + faster than using a bi-directional elevator_direction, in part + because of disk spin direction, in part because by the time one + reaches the end of the disk the beginning of the disk is the least + congested). + + search_start is the block number of the left + semantic neighbor of the node we create. + + return CARRY_ON if everything is ok + return NO_DISK_SPACE if out of disk space + return NO_MORE_UNUSED_CONTIGUOUS_BLOCKS if the block we found is not contiguous to the last one + + return block numbers found, in the array free_blocknrs. assumes + that any non-zero entries already present in the array are valid. + This feature is perhaps convenient coding when one might not have + used all blocknrs from the last time one called this function, or + perhaps it is an archaism from the days of schedule tracking, one + of us ought to reread the code that calls this, and analyze whether + it is still the right way to code it. + + spare space is used only when priority is set to 1. reiserfsck has + its own reiserfs_new_blocknrs, which can use reserved space + + Give example of who uses spare space, and say that it is a deadlock + avoidance mechanism. -Hans */ + +/* This function is NOT SCHEDULE-SAFE! */ + +static int do_reiserfs_new_blocknrs (struct reiserfs_transaction_handle *th, + unsigned long * free_blocknrs, + unsigned long search_start, + int amount_needed, int priority, + int for_unformatted, + int for_prealloc) +{ + struct super_block * s = th->t_super; + int i, j; + unsigned long * block_list_start = free_blocknrs; + int init_amount_needed = amount_needed; + unsigned long new_block = 0 ; + + if (SB_FREE_BLOCKS (s) < SPARE_SPACE && !priority) + /* we can answer NO_DISK_SPACE being asked for new block with + priority 0 */ + return NO_DISK_SPACE; + +#ifdef CONFIG_REISERFS_CHECK + if (!s) + reiserfs_panic (s, "vs-4090: reiserfs_new_blocknrs: trying to get new block from nonexistent device"); + + if (search_start == MAX_B_NUM) + reiserfs_panic (s, "vs-4100: reiserfs_new_blocknrs: we are optimizing location based on " + "the bogus location of a temp buffer (%lu).", search_start); + + if (amount_needed < 1 || amount_needed > 2) + reiserfs_panic (s, "vs-4110: reiserfs_new_blocknrs: amount_needed parameter incorrect (%d)", amount_needed); +#endif /* CONFIG_REISERFS_CHECK */ + + /* We continue the while loop if another process snatches our found + * free block from us after we find it but before we successfully + * mark it as in use, or if we need to use sync to free up some + * blocks on the preserve list. */ + + while (amount_needed--) { + /* skip over any blocknrs already gotten last time. */ + if (*(free_blocknrs) != 0) { +#ifdef CONFIG_REISERFS_CHECK + if (is_reusable (s, *free_blocknrs, 1) == 0) + reiserfs_panic(s, "vs-4120: reiserfs_new_blocknrs: bad blocknr on free_blocknrs list"); +#endif /* CONFIG_REISERFS_CHECK */ + free_blocknrs++; + continue; + } + /* look for zero bits in bitmap */ + if (find_zero_bit_in_bitmap(s,search_start, &i, &j,for_unformatted) == 0) { + if (find_zero_bit_in_bitmap(s,search_start,&i,&j, for_unformatted) == 0) { + /* recode without the goto and without + the if. It will require a + duplicate for. This is worth the + code clarity. Your way was + admirable, and just a bit too + clever in saving instructions.:-) + I'd say create a new function, but + that would slow things also, yes? + -Hans */ +free_and_return: + for ( ; block_list_start != free_blocknrs; block_list_start++) { + reiserfs_free_block (th, *block_list_start); + *block_list_start = 0; + } + if (for_prealloc) + return NO_MORE_UNUSED_CONTIGUOUS_BLOCKS; + else + return NO_DISK_SPACE; + } + } + + /* i and j now contain the results of the search. i = bitmap block + number containing free block, j = offset in this block. we + compute the blocknr which is our result, store it in + free_blocknrs, and increment the pointer so that on the next + loop we will insert into the next location in the array. Also + in preparation for the next loop, search_start is changed so + that the next search will not rescan the same range but will + start where this search finished. Note that while it is + possible that schedule has occurred and blocks have been freed + in that range, it is perhaps more important that the blocks + returned be near each other than that they be near their other + neighbors, and it also simplifies and speeds the code this way. */ + + /* journal: we need to make sure the block we are giving out is not + ** a log block, horrible things would happen there. + */ + new_block = (i * (s->s_blocksize << 3)) + j; + if (for_prealloc && (new_block - 1) != search_start) { + /* preallocated blocks must be contiguous, bail if we didnt find one. + ** this is not a bug. We want to do the check here, before the + ** bitmap block is prepared, and before we set the bit and log the + ** bitmap. + ** + ** If we do the check after this function returns, we have to + ** call reiserfs_free_block for new_block, which would be pure + ** overhead. + ** + ** for_prealloc should only be set if the caller can deal with the + ** NO_MORE_UNUSED_CONTIGUOUS_BLOCKS return value. This can be + ** returned before the disk is actually full + */ + goto free_and_return ; + } + search_start = new_block ; + if (search_start >= reiserfs_get_journal_block(s) && + search_start < (reiserfs_get_journal_block(s) + JOURNAL_BLOCK_COUNT)) { + reiserfs_warning("vs-4130: reiserfs_new_blocknrs: trying to allocate log block %lu\n", + search_start) ; + search_start++ ; + amount_needed++ ; + continue ; + } + + + reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[i], 1) ; + +#ifdef CONFIG_REISERFS_CHECK + if (buffer_locked (SB_AP_BITMAP (s)[i]) || is_reusable (s, search_start, 0) == 0) + reiserfs_panic (s, "vs-4140: reiserfs_new_blocknrs: bitmap block is locked or bad block number found"); +#endif + + /* if this bit was already set, we've scheduled, and someone else + ** has allocated it. loop around and try again + */ + if (reiserfs_test_and_set_le_bit (j, SB_AP_BITMAP (s)[i]->b_data)) { + reiserfs_warning("vs-4150: reiserfs_new_blocknrs, block not free"); + reiserfs_restore_prepared_buffer(s, SB_AP_BITMAP(s)[i]) ; + amount_needed++ ; + continue ; + } + journal_mark_dirty (th, s, SB_AP_BITMAP (s)[i]); + *free_blocknrs = search_start ; + free_blocknrs ++; + } + + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; + /* update free block count in super block */ + s->u.reiserfs_sb.s_rs->s_free_blocks = cpu_to_le32 (SB_FREE_BLOCKS (s) - init_amount_needed); + journal_mark_dirty (th, s, SB_BUFFER_WITH_SB (s)); + s->s_dirt = 1; + + return CARRY_ON; +} + +// this is called only by get_empty_nodes with for_preserve_list==0 +int reiserfs_new_blocknrs (struct reiserfs_transaction_handle *th, unsigned long * free_blocknrs, + unsigned long search_start, int amount_needed) { + return do_reiserfs_new_blocknrs(th, free_blocknrs, search_start, amount_needed, 0/*for_preserve_list-priority*/, 0/*for_formatted*/, 0/*for_prealloc */) ; +} + + +// called by get_new_buffer and by reiserfs_get_block with amount_needed == 1 and for_preserve_list == 0 +int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle *th, unsigned long * free_blocknrs, + unsigned long search_start) { +#if 0 +#ifdef REISERFS_PREALLOCATE + unsigned long border = (SB_BLOCK_COUNT(th->t_super) / 10); + if ( search_start < border ) search_start=border; +#endif +#endif + + return do_reiserfs_new_blocknrs(th, free_blocknrs, search_start, + 1/*amount_needed*/, + 0/*for_preserve_list-priority*/, + 1/*for formatted*/, + 0/*for prealloc */) ; +} + +#ifdef REISERFS_PREALLOCATE + /* So do I understand correctly that + in this code we snag the largest + contiguous extent we can that is + not more than 128 blocks, and which + is at least 2 blocks? -Hans */ + + +/* + The function pre-allocate 8 blocks. We can change this later. + Pre-allocation is used for files > 16 KB only. +*/ + /* how about taking the time to explain preallocation here? -Hans */ +int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th, + struct inode * p_s_inode, + unsigned long * free_blocknrs, + unsigned long search_start) +{ + int i, n, ret=0; + int allocated[8], blks; + /* Comments are okay with me to use. -Hans */ + + /* so why have we made this one / 10 */ + unsigned long bstart = (SB_BLOCK_COUNT(th->t_super) / 10); + /* and this one hashed? */ + /* how about we have a policy of explaining the rationale behind the algorithms for all of our code? */ + /* please perform benchmarking of + hashing by objectid instead of + k_dir_id, just to confirm that it + is helpful not harmful to put + related large files all together. + It might be harmful, I can argue it + both ways, we don't know until we + test. -Hans. */ + unsigned long btotal = SB_BLOCK_COUNT(th->t_super); + unsigned long border = (INODE_PKEY(p_s_inode)->k_dir_id) % + (btotal - bstart - 1) ; + border += bstart ; + + allocated[0] = 0 ; /* important. catches a good allocation when + * first prealloc works, and later one fails + */ + + /* It would be interesting to instead + try putting all unformatted nodes + after the first 1/3 of the disk and + benchmark, as it would put + formatted nodes closer to the log on + single disk drive machines. */ + if ( (p_s_inode->i_size < 4 * 4096) || + !(S_ISREG(p_s_inode->i_mode)) ) + { + if ( search_start < border ) search_start=border; + + ret = do_reiserfs_new_blocknrs(th, free_blocknrs, search_start, + 1/*amount_needed*/, + /* someone please remove the preserve list detritus. -Hans */ + 0/*for_preserve_list-priority*/, + 1/*for_formatted*/, + 0/*for prealloc */) ; + return ret; + } + + /* take a block off the prealloc list and return it -Hans */ + if (p_s_inode->u.reiserfs_i.i_prealloc_count > 0) { + p_s_inode->u.reiserfs_i.i_prealloc_count--; + *free_blocknrs = p_s_inode->u.reiserfs_i.i_prealloc_block++; + return ret; + } + + /* else get a new preallocation for the file */ + reiserfs_discard_prealloc (th, p_s_inode); + /* This does what? -Hans */ + if (search_start <= p_s_inode->u.reiserfs_i.i_prealloc_block) { + search_start = p_s_inode->u.reiserfs_i.i_prealloc_block; + } + + /* doing the compare again forces search_start to be >= the border, + ** even if the file already had prealloction done. This seems extra, + ** and should probably be removed + */ + if ( search_start < border ) search_start=border; + + *free_blocknrs = 0; + /* Don't use numbers like n, use numbers like PREALLOCATION_SIZE, and put them in the reiserfs_fs.h file. -Hans */ + n = 8; + blks = n-1; + for (i=0; i<n; i++) { + ret = do_reiserfs_new_blocknrs(th, free_blocknrs, search_start, + 1/*amount_needed*/, + 0/*for_preserve_list-priority*/, + 1/*for_formatted*/, + (i > 0)/*must_be_contiguous*/) ; + /* comment needed -Hans */ + if (ret != CARRY_ON) { + blks = i > 0 ? (i - 1) : 0 ; + break ; + } + allocated[i]= *free_blocknrs; +#ifdef CONFIG_REISERFS_CHECK + if ( (i>0) && (allocated[i] - allocated[i-1]) != 1 ) { + /* this is not a standard reiserfs_warning message -Hans */ + /* this should be caught by new_blocknrs now, checking code */ + /* use your email name of yura, not yr, and I don't believe you have written 4050 error messages.... -Hans */ + reiserfs_warning("yura-4160, reiserfs_new_unf_blocknrs2: pre-allocated not contiguous set of blocks!\n") ; + reiserfs_free_block(th, allocated[i]); + blks = i-1; + break; + } +#endif + if (i==0) { + p_s_inode->u.reiserfs_i.i_prealloc_block = *free_blocknrs; + } + search_start = *free_blocknrs; + *free_blocknrs = 0; + } + p_s_inode->u.reiserfs_i.i_prealloc_count = blks; + *free_blocknrs = p_s_inode->u.reiserfs_i.i_prealloc_block; + p_s_inode->u.reiserfs_i.i_prealloc_block++; + + /* we did actually manage to get 1 block */ + if (ret != CARRY_ON && allocated[0] > 0) { + return CARRY_ON ; + } + /* NO_MORE_UNUSED_CONTIGUOUS_BLOCKS should only mean something to + ** the preallocation code. The rest of the filesystem asks for a block + ** and should either get it, or know the disk is full. The code + ** above should never allow ret == NO_MORE_UNUSED_CONTIGUOUS_BLOCK, + ** as it doesn't send for_prealloc = 1 to do_reiserfs_new_blocknrs + ** unless it has already successfully allocated at least one block. + ** Just in case, we translate into a return value the rest of the + ** filesystem can understand. + */ + if (ret == NO_MORE_UNUSED_CONTIGUOUS_BLOCKS) { + return NO_DISK_SPACE ; + } + return ret; +} + + +// +// this is ext2_discard_prealloc +// +void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th, + struct inode * inode) +{ + if (inode->u.reiserfs_i.i_prealloc_count > 0) { + while (inode->u.reiserfs_i.i_prealloc_count--) { + reiserfs_free_block(th,inode->u.reiserfs_i.i_prealloc_block); + inode->u.reiserfs_i.i_prealloc_block++; + } + } + inode->u.reiserfs_i.i_prealloc_count = 0; +} +#endif diff -u -r --new-file linux/fs/reiserfs/buffer2.c v2.4.0-test8/linux/fs/reiserfs/buffer2.c --- linux/fs/reiserfs/buffer2.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/buffer2.c Sun May 14 23:37:04 2000 @@ -0,0 +1,471 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ + + +/* + * Contains code from + * + * linux/include/linux/lock.h and linux/fs/buffer.c /linux/fs/minix/fsync.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ +#ifdef __KERNEL__ + +#include <linux/sched.h> +#include <linux/locks.h> +#include <linux/reiserfs_fs.h> +#include <linux/smp_lock.h> + +#else + +#include "nokernel.h" + +#endif + + +/* + * wait_buffer_until_released + * reiserfs_bread + * reiserfs_getblk + * reiserfs_journal_end_io + * reiserfs_end_io_task + * reiserfs_end_buffer_io_sync + * get_new_buffer + */ + + + +/* when we allocate a new block (get_new_buffer, get_empty_nodes) and + get buffer for it, it is possible that it is held by someone else + or even by this process. In this function we wait until all other + holders release buffer. To make sure, that current process does not + hold we did free all buffers in tree balance structure + (get_empty_nodes and get_nodes_for_preserving) or in path structure + only (get_new_buffer) just before calling this */ +void wait_buffer_until_released (struct buffer_head * bh) +{ + int repeat_counter = 0; + + while (atomic_read (&(bh->b_count)) > 1) { + + if ( !(++repeat_counter % 30000000) ) { + reiserfs_warning ("vs-3050: wait_buffer_until_released: nobody releases buffer (%b). Still waiting (%d) %cJDIRTY %cJWAIT\n", + bh, repeat_counter, buffer_journaled(bh) ? ' ' : '!', + buffer_journal_dirty(bh) ? ' ' : '!'); + } + run_task_queue(&tq_disk); + current->policy |= SCHED_YIELD; + /*current->counter = 0;*/ + schedule(); + } + if (repeat_counter > 30000000) { + reiserfs_warning("vs-3051: done waiting on buffer (%b)\n", bh) ; + } +} + + +/* no longer need, should just make journal.c use the default handler */ +void reiserfs_journal_end_io (struct buffer_head *bh, int uptodate) +{ + mark_buffer_uptodate(bh, uptodate); + unlock_buffer(bh); + return ; +} + + +/* struct used to service end_io events. kmalloc'd in +** reiserfs_end_buffer_io_sync +*/ +struct reiserfs_end_io { + struct buffer_head *bh ; /* buffer head to check */ + struct tq_struct task ; /* task struct to use */ + struct reiserfs_end_io *self ; /* pointer to this struct for kfree to use */ +} ; + +/* +** does the hash list updating required to release a buffer head. +** must not be called at interrupt time (so I can use the non irq masking +** spinlocks). Right now, put onto the schedule task queue, one for +** each block that gets written +*/ +static void reiserfs_end_io_task(struct reiserfs_end_io *io) { + struct buffer_head *bh = io->bh ; + int windex = push_journal_writer("end_io_task") ; + + if (buffer_journal_dirty(bh)) { + struct reiserfs_journal_cnode *cur ; + struct super_block * s = get_super (bh->b_dev); + + if (!s) + goto done ; + + if (!buffer_journal_dirty(bh)) { + goto done ; + } + mark_buffer_notjournal_dirty(bh) ; + cur = (journal_hash(SB_JOURNAL(s)->j_list_hash_table, bh->b_dev, bh->b_blocknr)) ; + while(cur) { + if (cur->bh && cur->blocknr == bh->b_blocknr && cur->dev == bh->b_dev) { + if (cur->jlist) { /* since we are clearing the bh, we must decrement nonzerolen */ + atomic_dec(&(cur->jlist->j_nonzerolen)) ; + } + cur->bh = NULL ; + } + cur = cur->hnext ; + } + atomic_dec(&(bh->b_count)) ; + } +done: + kfree(io->self) ; + pop_journal_writer(windex) ; + brelse(bh) ; + return ; +} + +/* +** general end_io routine for all reiserfs blocks. +** logged blocks will come in here marked buffer_journal_dirty() +** a reiserfs_end_io struct is kmalloc'd for them, and a task is put +** on the scheduler queue. It then does all the required hash table +** operations to reflect the buffer as writen +*/ +void reiserfs_end_buffer_io_sync (struct buffer_head *bh, int uptodate) +{ + + mark_buffer_notjournal_new(bh) ; + if (buffer_journal_dirty(bh)) { + struct reiserfs_end_io *io = kmalloc(sizeof(struct reiserfs_end_io), + GFP_ATOMIC) ; + /* note, if kmalloc fails, this buffer will be taken care of + ** by a check at the end of do_journal_end() in journal.c + */ + if (io) { + io->task.next = NULL ; + io->task.sync = 0 ; + io->task.routine = (void *)(void *)reiserfs_end_io_task ; + io->task.data = io ; + io->self = io ; + io->bh = bh ; + atomic_inc(&(bh->b_count)) ; + queue_task(&(io->task), &reiserfs_end_io_tq) ; + } else { + printk("reiserfs/buffer.c-184: kmalloc returned NULL block %lu\n", + bh->b_blocknr) ; + } + } + mark_buffer_uptodate(bh, uptodate); + unlock_buffer(bh); +} + + +/* + * reiserfs_bread() reads a specified block and returns the buffer that contains + * it. It returns NULL if the block was unreadable. + */ +/* It first tries to find the block in cache, and if it cannot do so + then it creates a new buffer and schedules I/O to read the + block. */ +/* The function is NOT SCHEDULE-SAFE! */ + +struct buffer_head * reiserfs_bread (kdev_t n_dev, int n_block, int n_size) +{ + struct buffer_head * bh; + + bh = bread (n_dev, n_block, n_size); + if (bh) { + bh->b_end_io = reiserfs_end_buffer_io_sync; + } + return bh; +} + + +/* This function looks for a buffer which contains a given block. If + the block is in cache it returns it, otherwise it returns a new + buffer which is not uptodate. This is called by reiserfs_bread and + other functions. Note that get_new_buffer ought to be called this + and this ought to be called get_new_buffer, since this doesn't + actually get the block off of the disk. */ +/* The function is NOT SCHEDULE-SAFE! */ + +struct buffer_head * reiserfs_getblk (kdev_t n_dev, int n_block, int n_size) +{ + struct buffer_head * bh; + + bh = getblk (n_dev, n_block, n_size); + if (bh) { + bh->b_end_io = reiserfs_end_buffer_io_sync ; + } + return bh; +} + + + + +#ifdef NEW_GET_NEW_BUFFER + +/* returns one buffer with a blocknr near blocknr. */ +static int get_new_buffer_near_blocknr( + struct super_block * p_s_sb, + int blocknr, + struct buffer_head ** pp_s_new_bh, + struct path * p_s_path + ) { + unsigned long n_new_blocknumber = 0; + int n_ret_value, + n_repeat = CARRY_ON; + +#ifdef CONFIG_REISERFS_CHECK + int repeat_counter = 0; + + if (!blocknr) + printk ("blocknr passed to get_new_buffer_near_blocknr was 0"); +#endif + + + if ( (n_ret_value = reiserfs_new_blocknrs (p_s_sb, &n_new_blocknumber, + blocknr, 1)) == NO_DISK_SPACE ) + return NO_DISK_SPACE; + + *pp_s_new_bh = reiserfs_getblk(p_s_sb->s_dev, n_new_blocknumber, p_s_sb->s_blocksize); + if ( buffer_uptodate(*pp_s_new_bh) ) { + +#ifdef CONFIG_REISERFS_CHECK + if ( buffer_dirty(*pp_s_new_bh) || (*pp_s_new_bh)->b_dev == NODEV ) { + reiserfs_panic(p_s_sb, "PAP-14080: get_new_buffer: invalid uptodate buffer %b for the new block", *pp_s_new_bh); + } +#endif + + /* Free path buffers to prevent deadlock. */ + /* It is possible that this process has the buffer, which this function is getting, already in + its path, and is responsible for double incrementing the value of b_count. If we recalculate + the path after schedule we can avoid risking an endless loop. This problematic situation is + possible in a multiple processing environment. Suppose process 1 has acquired a path P; then + process 2 balanced and remove block A from the tree. Process 1 continues and runs + get_new_buffer, that returns buffer with block A. If node A was on the path P, then it will + have b_count == 2. If we now will simply wait in while ( (*pp_s_new_bh)->b_count > 1 ) we get + into an endless loop, as nobody will release this buffer and the current process holds buffer + twice. That is why we do decrement_counters_in_path(p_s_path) before waiting until b_count + becomes 1. (it there were other processes holding node A, then eventually we will get a + moment, when all of them released a buffer). */ + if ( atomic_read (&((*pp_s_new_bh)->b_count)) > 1 ) { + decrement_counters_in_path(p_s_path); + n_ret_value |= SCHEDULE_OCCURRED; + } + + while ( atomic_read (&((*pp_s_new_bh)->b_count)) > 1 ) { + +#ifdef REISERFS_INFO + printk("get_new_buffer() calls schedule to decrement b_count\n"); +#endif + +#ifdef CONFIG_REISERFS_CHECK + if ( ! (++repeat_counter % 10000) ) + printk("get_new_buffer(%u): counter(%d) too big", current->pid, repeat_counter); +#endif + + current->counter = 0; + schedule(); + } + +#ifdef CONFIG_REISERFS_CHECK + if ( buffer_dirty(*pp_s_new_bh) || (*pp_s_new_bh)->b_dev == NODEV ) { + print_buffer_head(*pp_s_new_bh,"get_new_buffer"); + reiserfs_panic(p_s_sb, "PAP-14090: get_new_buffer: invalid uptodate buffer %b for the new block(case 2)", *pp_s_new_bh); + } +#endif + + } + else { + ; + +#ifdef CONFIG_REISERFS_CHECK + if (atomic_read (&((*pp_s_new_bh)->b_count)) != 1) { + reiserfs_panic(p_s_sb,"PAP-14100: get_new_buffer: not uptodate buffer %b for the new block has b_count more than one", + *pp_s_new_bh); + } +#endif + + } + return (n_ret_value | n_repeat); +} + + +/* returns the block number of the last unformatted node, assumes p_s_key_to_search.k_offset is a byte in the tail of + the file, Useful for when you want to append to a file, and convert a direct item into an unformatted node near the + last unformatted node of the file. Putting the unformatted node near the direct item is potentially very bad to do. + If there is no unformatted node in the file, then we return the block number of the direct item. */ +/* The function is NOT SCHEDULE-SAFE! */ +inline int get_last_unformatted_node_blocknr_of_file( struct key * p_s_key_to_search, struct super_block * p_s_sb, + struct buffer_head * p_s_bh + struct path * p_unf_search_path, struct inode * p_s_inode) + +{ + struct key unf_key_to_search; + struct item_head * p_s_ih; + int n_pos_in_item; + struct buffer_head * p_indirect_item_bh; + + copy_key(&unf_key_to_search,p_s_key_to_search); + unf_key_to_search.k_uniqueness = TYPE_INDIRECT; + unf_key_to_search.k_offset = p_s_inode->u.reiserfs_i.i_first_direct_byte - 1; + + /* p_s_key_to_search->k_offset - MAX_ITEM_LEN(p_s_sb->s_blocksize); */ + if (search_for_position_by_key (p_s_sb, &unf_key_to_search, p_unf_search_path, &n_pos_in_item) == POSITION_FOUND) + { + p_s_ih = B_N_PITEM_HEAD(p_indirect_item_bh = PATH_PLAST_BUFFER(p_unf_search_path), PATH_LAST_POSITION(p_unf_search_path)); + return (B_I_POS_UNFM_POINTER(p_indirect_item_bh, p_s_ih, n_pos_in_item)); + } + /* else */ + printk("reiser-1800: search for unformatted node failed, p_s_key_to_search->k_offset = %u, unf_key_to_search.k_offset = %u, MAX_ITEM_LEN(p_s_sb->s_blocksize) = %ld, debug this\n", p_s_key_to_search->k_offset, unf_key_to_search.k_offset, MAX_ITEM_LEN(p_s_sb->s_blocksize) ); + print_buffer_head(PATH_PLAST_BUFFER(p_unf_search_path), "the buffer holding the item before the key we failed to find"); + print_block_head(PATH_PLAST_BUFFER(p_unf_search_path), "the block head"); + return 0; /* keeps the compiler quiet */ +} + + + /* hasn't been out of disk space tested */ +/* The function is NOT SCHEDULE-SAFE! */ +static int get_buffer_near_last_unf ( struct super_block * p_s_sb, struct key * p_s_key_to_search, + struct inode * p_s_inode, struct buffer_head * p_s_bh, + struct buffer_head ** pp_s_un_bh, struct path * p_s_search_path) +{ + int unf_blocknr = 0, /* blocknr from which we start search for a free block for an unformatted node, if 0 + then we didn't find an unformatted node though we might have found a file hole */ + n_repeat = CARRY_ON; + struct key unf_key_to_search; + struct path unf_search_path; + + copy_key(&unf_key_to_search,p_s_key_to_search); + unf_key_to_search.k_uniqueness = TYPE_INDIRECT; + + if ( + (p_s_inode->u.reiserfs_i.i_first_direct_byte > 4095) /* i_first_direct_byte gets used for all sorts of + crap other than what the name indicates, thus + testing to see if it is 0 is not enough */ + && (p_s_inode->u.reiserfs_i.i_first_direct_byte < MAX_KEY_OFFSET) /* if there is no direct item then + i_first_direct_byte = MAX_KEY_OFFSET */ + ) + { + /* actually, we don't want the last unformatted node, we want the last unformatted node + which is before the current file offset */ + unf_key_to_search.k_offset = ((p_s_inode->u.reiserfs_i.i_first_direct_byte -1) < unf_key_to_search.k_offset) ? p_s_inode->u.reiserfs_i.i_first_direct_byte -1 : unf_key_to_search.k_offset; + + while (unf_key_to_search.k_offset > -1) + { + /* This is our poorly documented way of initializing paths. -Hans */ + init_path (&unf_search_path); + /* get the blocknr from which we start the search for a free block. */ + unf_blocknr = get_last_unformatted_node_blocknr_of_file( p_s_key_to_search, /* assumes this points to the file tail */ + p_s_sb, /* lets us figure out the block size */ + p_s_bh, /* if there is no unformatted node in the file, + then it returns p_s_bh->b_blocknr */ + &unf_search_path, + p_s_inode + ); +/* printk("in while loop: unf_blocknr = %d, *pp_s_un_bh = %p\n", unf_blocknr, *pp_s_un_bh); */ + if (unf_blocknr) + break; + else /* release the path and search again, this could be really slow for huge + holes.....better to spend the coding time adding compression though.... -Hans */ + { + /* Vladimir, is it a problem that I don't brelse these buffers ?-Hans */ + decrement_counters_in_path(&unf_search_path); + unf_key_to_search.k_offset -= 4096; + } + } + if (unf_blocknr) { + n_repeat |= get_new_buffer_near_blocknr(p_s_sb, unf_blocknr, pp_s_un_bh, p_s_search_path); + } + else { /* all unformatted nodes are holes */ + n_repeat |= get_new_buffer_near_blocknr(p_s_sb, p_s_bh->b_blocknr, pp_s_un_bh, p_s_search_path); + } + } + else { /* file has no unformatted nodes */ + n_repeat |= get_new_buffer_near_blocknr(p_s_sb, p_s_bh->b_blocknr, pp_s_un_bh, p_s_search_path); +/* printk("in else: unf_blocknr = %d, *pp_s_un_bh = %p\n", unf_blocknr, *pp_s_un_bh); */ +/* print_path (0, p_s_search_path); */ + } + + return n_repeat; +} + +#endif /* NEW_GET_NEW_BUFFER */ + + +#ifdef OLD_GET_NEW_BUFFER + +/* The function is NOT SCHEDULE-SAFE! */ +int get_new_buffer( + struct reiserfs_transaction_handle *th, + struct buffer_head * p_s_bh, + struct buffer_head ** pp_s_new_bh, + struct path * p_s_path + ) { + unsigned long n_new_blocknumber = 0; + int n_repeat; + struct super_block * p_s_sb = th->t_super; + + if ( (n_repeat = reiserfs_new_unf_blocknrs (th, &n_new_blocknumber, p_s_bh->b_blocknr)) == NO_DISK_SPACE ) + return NO_DISK_SPACE; + + *pp_s_new_bh = reiserfs_getblk(p_s_sb->s_dev, n_new_blocknumber, p_s_sb->s_blocksize); + if (atomic_read (&(*pp_s_new_bh)->b_count) > 1) { + /* Free path buffers to prevent deadlock which can occur in the + situation like : this process holds p_s_path; Block + (*pp_s_new_bh)->b_blocknr is on the path p_s_path, but it is + not necessary, that *pp_s_new_bh is in the tree; process 2 + could remove it from the tree and freed block + (*pp_s_new_bh)->b_blocknr. Reiserfs_new_blocknrs in above + returns block (*pp_s_new_bh)->b_blocknr. Reiserfs_getblk gets + buffer for it, and it has b_count > 1. If we now will simply + wait in while ( (*pp_s_new_bh)->b_count > 1 ) we get into an + endless loop, as nobody will release this buffer and the + current process holds buffer twice. That is why we do + decrement_counters_in_path(p_s_path) before waiting until + b_count becomes 1. (it there were other processes holding node + pp_s_new_bh, then eventually we will get a moment, when all of + them released a buffer). */ + decrement_counters_in_path(p_s_path); + wait_buffer_until_released (*pp_s_new_bh); + n_repeat |= SCHEDULE_OCCURRED; + } + +#ifdef CONFIG_REISERFS_CHECK + if ( atomic_read (&((*pp_s_new_bh)->b_count)) != 1 || buffer_dirty (*pp_s_new_bh)) { + reiserfs_panic(p_s_sb,"PAP-14100: get_new_buffer: not free or dirty buffer %b for the new block", + *pp_s_new_bh); + } +#endif + + return n_repeat; +} + +#endif /* OLD_GET_NEW_BUFFER */ + + +#ifdef GET_MANY_BLOCKNRS + /* code not yet functional */ +get_next_blocknr ( + unsigned long * p_blocknr_array, /* we get a whole bunch of blocknrs all at once for + the write. This is better than getting them one at + a time. */ + unsigned long ** p_blocknr_index, /* pointer to current offset into the array. */ + unsigned long blocknr_array_length +) +{ + unsigned long return_value; + + if (*p_blocknr_index < p_blocknr_array + blocknr_array_length) { + return_value = **p_blocknr_index; + **p_blocknr_index = 0; + *p_blocknr_index++; + return (return_value); + } + else + { + kfree (p_blocknr_array); + } +} +#endif /* GET_MANY_BLOCKNRS */ + diff -u -r --new-file linux/fs/reiserfs/dir.c v2.4.0-test8/linux/fs/reiserfs/dir.c --- linux/fs/reiserfs/dir.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/dir.c Fri Aug 25 18:24:30 2000 @@ -0,0 +1,243 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ +#ifdef __KERNEL__ + +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/reiserfs_fs.h> +#include <linux/stat.h> +#include <linux/smp_lock.h> +#include <asm/uaccess.h> + +#else + +#include "nokernel.h" + +#endif + +extern struct key MIN_KEY; + +static int reiserfs_readdir (struct file *, void *, filldir_t); +int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, int datasync) ; + +struct file_operations reiserfs_dir_operations = { + read: generic_read_dir, + readdir: reiserfs_readdir, + fsync: reiserfs_dir_fsync, +}; + +/* + * directories can handle most operations... + */ +struct inode_operations reiserfs_dir_inode_operations = { + //&reiserfs_dir_operations, /* default_file_ops */ + create: reiserfs_create, + lookup: reiserfs_lookup, + link: reiserfs_link, + unlink: reiserfs_unlink, + symlink: reiserfs_symlink, + mkdir: reiserfs_mkdir, + rmdir: reiserfs_rmdir, + mknod: reiserfs_mknod, + rename: reiserfs_rename, +}; + +int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, int datasync) { + int ret = 0 ; + int windex ; + struct reiserfs_transaction_handle th ; + + journal_begin(&th, dentry->d_inode->i_sb, 1) ; + windex = push_journal_writer("dir_fsync") ; + reiserfs_prepare_for_journal(th.t_super, SB_BUFFER_WITH_SB(th.t_super), 1) ; + journal_mark_dirty(&th, dentry->d_inode->i_sb, SB_BUFFER_WITH_SB (dentry->d_inode->i_sb)) ; + pop_journal_writer(windex) ; + journal_end_sync(&th, dentry->d_inode->i_sb, 1) ; + + return ret ; +} + + +#define store_ih(where,what) copy_item_head (where, what) + +// +static int reiserfs_readdir (struct file * filp, void * dirent, filldir_t filldir) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ + INITIALIZE_PATH (path_to_entry); + struct buffer_head * bh; + int item_num, entry_num; + struct key * rkey; + struct item_head * ih, tmp_ih; + int search_res; + char * local_buf; + loff_t next_pos; + char small_buf[32] ; /* avoid kmalloc if we can */ + struct reiserfs_dir_entry de; + + + reiserfs_check_lock_depth("readdir") ; + + /* form key for search the next directory entry using f_pos field of + file structure */ + make_cpu_key (&pos_key, inode, (filp->f_pos) ? (filp->f_pos) : DOT_OFFSET, + TYPE_DIRENTRY, 3); + next_pos = cpu_key_k_offset (&pos_key); + + /* reiserfs_warning ("reiserfs_readdir 1: f_pos = %Ld\n", filp->f_pos);*/ + + while (1) { + research: + /* search the directory item, containing entry with specified key */ + search_res = search_by_entry_key (inode->i_sb, &pos_key, &path_to_entry, &de); + if (search_res == IO_ERROR) { + // FIXME: we could just skip part of directory which could + // not be read + return -EIO; + } + entry_num = de.de_entry_num; + bh = de.de_bh; + item_num = de.de_item_num; + ih = de.de_ih; + store_ih (&tmp_ih, ih); + +#ifdef CONFIG_REISERFS_CHECK + /* we must have found item, that is item of this directory, */ + if (COMP_SHORT_KEYS (&(ih->ih_key), &pos_key)) + reiserfs_panic (inode->i_sb, "vs-9000: reiserfs_readdir: " + "found item %h does not match to dir we readdir %k", + ih, &pos_key); + + if (item_num > B_NR_ITEMS (bh) - 1) + reiserfs_panic (inode->i_sb, "vs-9005: reiserfs_readdir: " + "item_num == %d, item amount == %d", + item_num, B_NR_ITEMS (bh)); + + /* and entry must be not more than number of entries in the item */ + if (I_ENTRY_COUNT (ih) < entry_num) + reiserfs_panic (inode->i_sb, "vs-9010: reiserfs_readdir: " + "entry number is too big %d (%d)", + entry_num, I_ENTRY_COUNT (ih)); +#endif /* CONFIG_REISERFS_CHECK */ + + if (search_res == POSITION_FOUND || entry_num < I_ENTRY_COUNT (ih)) { + /* go through all entries in the directory item beginning from the entry, that has been found */ + struct reiserfs_de_head * deh = B_I_DEH (bh, ih) + entry_num; + + for (; entry_num < I_ENTRY_COUNT (ih); entry_num ++, deh ++) { + int d_reclen; + char * d_name; + off_t d_off; + ino_t d_ino; + + if (!de_visible (deh)) + /* it is hidden entry */ + continue; + d_reclen = entry_length (bh, ih, entry_num); + d_name = B_I_DEH_ENTRY_FILE_NAME (bh, ih, deh); + if (!d_name[d_reclen - 1]) + d_reclen = strlen (d_name); + + d_off = deh_offset (deh); + filp->f_pos = d_off ; + d_ino = deh_objectid (deh); + if (d_reclen <= 32) { + local_buf = small_buf ; + } else { + local_buf = kmalloc(d_reclen, GFP_BUFFER) ; + if (!local_buf) { + pathrelse (&path_to_entry); + return -ENOMEM ; + } + if (item_moved (&tmp_ih, &path_to_entry)) { + kfree(local_buf) ; + goto research; + } + } + // Note, that we copy name to user space via temporary + // buffer (local_buf) because filldir will block if + // user space buffer is swapped out. At that time + // entry can move to somewhere else + memcpy (local_buf, d_name, d_reclen); + if (filldir (dirent, d_name, d_reclen, d_off, d_ino, + DT_UNKNOWN) < 0) { + if (local_buf != small_buf) { + kfree(local_buf) ; + } + goto end; + } + if (local_buf != small_buf) { + kfree(local_buf) ; + } + + // next entry should be looked for with such offset + next_pos = deh_offset (deh) + 1; + + if (item_moved (&tmp_ih, &path_to_entry)) { + reiserfs_warning ("vs-9020: reiserfs_readdir " + "things are moving under hands. Researching..\n"); + goto research; + } + } /* for */ + } + + if (item_num != B_NR_ITEMS (bh) - 1) + // end of directory has been reached + goto end; + + /* item we went through is last item of node. Using right + delimiting key check is it directory end */ + rkey = get_rkey (&path_to_entry, inode->i_sb); + if (! comp_le_keys (rkey, &MIN_KEY)) { +#ifdef CONFIG_REISERFS_CHECK + reiserfs_warning ("vs-9025: reiserfs_readdir:" + "get_rkey failed. Researching..\n"); +#endif + /* set pos_key to key, that is the smallest and greater + that key of the last entry in the item */ + set_cpu_key_k_offset (&pos_key, next_pos); + continue; + } + + if ( COMP_SHORT_KEYS (rkey, &pos_key)) { + // end of directory has been reached + goto end; + } + + /* directory continues in the right neighboring block */ + set_cpu_key_k_offset (&pos_key, le_key_k_offset (ITEM_VERSION_1, rkey)); + + } /* while */ + + + end: + // FIXME: ext2_readdir does not reset f_pos + filp->f_pos = next_pos; + pathrelse (&path_to_entry); + reiserfs_check_path(&path_to_entry) ; + return 0; +} + + + + + + + + + + + + + + + + + + + + + diff -u -r --new-file linux/fs/reiserfs/do_balan.c v2.4.0-test8/linux/fs/reiserfs/do_balan.c --- linux/fs/reiserfs/do_balan.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/do_balan.c Mon Sep 11 05:21:49 2000 @@ -0,0 +1,2042 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ + +/* Now we have all buffers that must be used in balancing of the tree */ +/* Further calculations can not cause schedule(), and thus the buffer */ +/* tree will be stable until the balancing will be finished */ +/* balance the tree according to the analysis made before, */ +/* and using buffers obtained after all above. */ + + +/** + ** balance_leaf_when_delete + ** balance_leaf + ** do_balance + ** + **/ + +#ifdef __KERNEL__ + +#include <asm/uaccess.h> +#include <linux/sched.h> +#include <linux/reiserfs_fs.h> + +#else + +#include "nokernel.h" + +#endif + + +#ifdef CONFIG_REISERFS_CHECK + +struct tree_balance * cur_tb = NULL; /* detects whether more than one + copy of tb exists as a means + of checking whether schedule + is interrupting do_balance */ +#endif + + +inline void do_balance_mark_leaf_dirty (struct tree_balance * tb, + struct buffer_head * bh, int flag) +{ + if (reiserfs_dont_log(tb->tb_sb)) { + if (!test_and_set_bit(BH_Dirty, &bh->b_state)) { + __mark_buffer_dirty(bh) ; + tb->need_balance_dirty = 1; + } + } else { + int windex = push_journal_writer("do_balance") ; + journal_mark_dirty(tb->transaction_handle, tb->transaction_handle->t_super, bh) ; + pop_journal_writer(windex) ; + } +} + +#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty +#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty + + +/* summary: + if deleting something ( tb->insert_size[0] < 0 ) + return(balance_leaf_when_delete()); (flag d handled here) + else + if lnum is larger than 0 we put items into the left node + if rnum is larger than 0 we put items into the right node + if snum1 is larger than 0 we put items into the new node s1 + if snum2 is larger than 0 we put items into the new node s2 +Note that all *num* count new items being created. + +It would be easier to read balance_leaf() if each of these summary +lines was a separate procedure rather than being inlined. I think +that there are many passages here and in balance_leaf_when_delete() in +which two calls to one procedure can replace two passages, and it +might save cache space and improve software maintenance costs to do so. + +Vladimir made the perceptive comment that we should offload most of +the decision making in this function into fix_nodes/check_balance, and +then create some sort of structure in tb that says what actions should +be performed by do_balance. + +-Hans */ + + + +/* Balance leaf node in case of delete or cut: insert_size[0] < 0 + * + * lnum, rnum can have values >= -1 + * -1 means that the neighbor must be joined with S + * 0 means that nothing should be done with the neighbor + * >0 means to shift entirely or partly the specified number of items to the neighbor + */ +static int balance_leaf_when_delete (struct tree_balance * tb, int flag) +{ + struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path); + int item_pos = PATH_LAST_POSITION (tb->tb_path); + int pos_in_item = tb->tb_path->pos_in_item; + struct buffer_info bi; + int n; + struct item_head * ih; + +#ifdef CONFIG_REISERFS_CHECK + if ( tb->FR[0] && B_LEVEL (tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1) + reiserfs_panic (tb->tb_sb, + "vs- 12000: balance_leaf_when_delete:level: wrong FR %z\n", tb->FR[0]); + if ( tb->blknum[0] > 1 ) + reiserfs_panic (tb->tb_sb, + "PAP-12005: balance_leaf_when_delete: " + "tb->blknum == %d, can not be > 1", tb->blknum[0]); + + if ( ! tb->blknum[0] && ! PATH_H_PPARENT(tb->tb_path, 0)) + reiserfs_panic (tb->tb_sb, "PAP-12010: balance_leaf_when_delete: tree can not be empty"); +#endif + + ih = B_N_PITEM_HEAD (tbS0, item_pos); + + /* Delete or truncate the item */ + + switch (flag) { + case M_DELETE: /* delete item in S[0] */ + +#ifdef CONFIG_REISERFS_CHECK + if (le16_to_cpu (ih->ih_item_len) + IH_SIZE != -tb->insert_size [0]) + reiserfs_panic (tb->tb_sb, "vs-12013: balance_leaf_when_delete: " + "mode Delete, insert size %d, ih to be deleted %h", ih); + +#if 0 /* rigth delim key not supported */ + if ( ! item_pos && (! tb->L[0] || COMP_KEYS(B_PRIGHT_DELIM_KEY(tb->L[0]), B_N_PKEY(tbS0, 0))) ) { + print_cur_tb ("12015"); + reiserfs_panic (tb->tb_sb, "PAP-12015: balance_leaf_when_delete: L0's rkey does not match to 1st key of S0: " + "rkey in L %k, first key in S0 %k, rkey in CFL %k", + tb->L[0] ? B_PRIGHT_DELIM_KEY(tb->L[0]) : 0, + B_N_PKEY(tbS0, 0), + tb->CFL[0] ? B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]) : 0); + } +#endif + +#endif + + bi.tb = tb; + bi.bi_bh = tbS0; + bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); + bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); + leaf_delete_items (&bi, 0, item_pos, 1, -1); + + if ( ! item_pos && tb->CFL[0] ) { + if ( B_NR_ITEMS(tbS0) ) { + replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0); +#if 0 /* right delim key support */ + copy_key(B_PRIGHT_DELIM_KEY(tb->L[0]), B_N_PKEY(tbS0, 0)); + reiserfs_mark_buffer_dirty (tb->L[0], 0); +#endif + } + else { + if ( ! PATH_H_POSITION (tb->tb_path, 1) ) + replace_key(tb, tb->CFL[0],tb->lkey[0],PATH_H_PPARENT(tb->tb_path, 0),0); +#if 0 /* right delim key support */ + copy_key(B_PRIGHT_DELIM_KEY(tb->L[0]), B_PRIGHT_DELIM_KEY(tbS0)); + reiserfs_mark_buffer_dirty (tb->L[0], 0); +#endif + } + } + +#ifdef CONFIG_REISERFS_CHECK +#if 0 + if (! item_pos && (!tb->CFL[0] || !tb->L[0])) +#endif + if (! item_pos && !tb->CFL[0]) + reiserfs_panic (tb->tb_sb, "PAP-12020: balance_leaf_when_delete: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], tb->L[0]); +#endif + + break; + + case M_CUT: { /* cut item in S[0] */ + bi.tb = tb; + bi.bi_bh = tbS0; + bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); + bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); + if (is_direntry_le_ih (ih)) { + +#ifdef CONFIG_REISERFS_CHECK +#if 0 /* right delim key support */ + if ( ! item_pos && ! pos_in_item && (! tb->L[0] || COMP_KEYS(B_PRIGHT_DELIM_KEY(tb->L[0]), + B_N_PKEY(tbS0, 0))) ) + reiserfs_panic(tb->tb_sb, "PAP-12025: balance_leaf_when_delete: illegal right delimiting key"); +#endif +#endif + + /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ + /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */ + tb->insert_size[0] = -1; + leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]); + +#ifdef CONFIG_REISERFS_CHECK + if (! item_pos && ! pos_in_item && ! tb->CFL[0]) + reiserfs_panic (tb->tb_sb, "PAP-12030: balance_leaf_when_delete: can not change delimiting key. CFL[0]=%p", tb->CFL[0]); +#endif /* CONFIG_REISERFS_CHECK */ + + if ( ! item_pos && ! pos_in_item && tb->CFL[0] ) { + replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0); +#if 0/* right delim key support */ + copy_key(B_PRIGHT_DELIM_KEY(tb->L[0]), B_N_PKEY(tbS0, 0)); + reiserfs_mark_buffer_dirty (tb->L[0], 0); +#endif + } + } else { + leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]); + +#ifdef CONFIG_REISERFS_CHECK + if (! ih->ih_item_len) + reiserfs_panic (tb->tb_sb, "PAP-12035: balance_leaf_when_delete: cut must leave non-zero dynamic length of item"); +#endif /* CONFIG_REISERFS_CHECK */ + } + break; + } + + default: + print_cur_tb ("12040"); + reiserfs_panic (tb->tb_sb, "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)", + (flag == M_PASTE) ? "PASTE" : ((flag == M_INSERT) ? "INSERT" : "UNKNOWN"), flag); + } + + /* the rule is that no shifting occurs unless by shifting a node can be freed */ + n = B_NR_ITEMS(tbS0); + if ( tb->lnum[0] ) /* L[0] takes part in balancing */ + { + if ( tb->lnum[0] == -1 ) /* L[0] must be joined with S[0] */ + { + if ( tb->rnum[0] == -1 ) /* R[0] must be also joined with S[0] */ + { + if ( tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0) ) + { + /* all contents of all the 3 buffers will be in L[0] */ + if ( PATH_H_POSITION (tb->tb_path, 1) == 0 && 1 < B_NR_ITEMS(tb->FR[0]) ) + replace_key(tb, tb->CFL[0],tb->lkey[0],tb->FR[0],1); + + /* update right_delimiting_key field */ +#if 0 + copy_key (B_PRIGHT_DELIM_KEY (tb->L[0]), B_PRIGHT_DELIM_KEY (tb->R[0])); +#endif + leaf_move_items (LEAF_FROM_S_TO_L, tb, n, -1, 0); + leaf_move_items (LEAF_FROM_R_TO_L, tb, B_NR_ITEMS(tb->R[0]), -1, 0); + +#if 0/*preserve list*/ + preserve_invalidate(tb, tbS0, tb->L[0]); + preserve_invalidate(tb, tb->R[0], tb->L[0]); +#endif + reiserfs_invalidate_buffer (tb, tbS0); + reiserfs_invalidate_buffer (tb, tb->R[0]); + + return 0; + } + /* all contents of all the 3 buffers will be in R[0] */ + leaf_move_items (LEAF_FROM_S_TO_R, tb, n, -1, 0); + leaf_move_items (LEAF_FROM_L_TO_R, tb, B_NR_ITEMS(tb->L[0]), -1, 0); + + /* right_delimiting_key is correct in R[0] */ + replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); + +#if 0 + /* mark tb->R[0] as suspected recipient */ + preserve_invalidate(tb,tbS0, tb->R[0]); + preserve_invalidate(tb,tb->L[0], tb->R[0]); +#endif + reiserfs_invalidate_buffer (tb, tbS0); + reiserfs_invalidate_buffer (tb, tb->L[0]); + + return -1; + } + +#ifdef CONFIG_REISERFS_CHECK + if ( tb->rnum[0] != 0 ) + reiserfs_panic (tb->tb_sb, "PAP-12045: balance_leaf_when_delete: " + "rnum must be 0 (%d)", tb->rnum[0]); +#endif /* CONFIG_REISERFS_CHECK */ + + /* all contents of L[0] and S[0] will be in L[0] */ + leaf_shift_left(tb, n, -1); + +#if 0/*preserve list*/ + preserve_invalidate(tb, tbS0, tb->L[0]); /* preserved, shifting */ +#endif + reiserfs_invalidate_buffer (tb, tbS0); + + return 0; + } + /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */ + +#ifdef CONFIG_REISERFS_CHECK + if (( tb->lnum[0] + tb->rnum[0] < n ) || ( tb->lnum[0] + tb->rnum[0] > n+1 )) + reiserfs_panic (tb->tb_sb, "PAP-12050: balance_leaf_when_delete: " + "rnum(%d) and lnum(%d) and item number in S[0] are not consistent", + tb->rnum[0], tb->lnum[0], n); + + if (( tb->lnum[0] + tb->rnum[0] == n ) && (tb->lbytes != -1 || tb->rbytes != -1)) + reiserfs_panic (tb->tb_sb, "PAP-12055: balance_leaf_when_delete: " + "bad rbytes (%d)/lbytes (%d) parameters when items are not split", + tb->rbytes, tb->lbytes); + if (( tb->lnum[0] + tb->rnum[0] == n + 1 ) && (tb->lbytes < 1 || tb->rbytes != -1)) + reiserfs_panic (tb->tb_sb, "PAP-12060: balance_leaf_when_delete: " + "bad rbytes (%d)/lbytes (%d) parameters when items are split", + tb->rbytes, tb->lbytes); +#endif + + leaf_shift_left (tb, tb->lnum[0], tb->lbytes); + leaf_shift_right(tb, tb->rnum[0], tb->rbytes); + +#if 0/*preserve list*/ + preserve_invalidate (tb, tbS0, tb->L[0]); + mark_suspected_recipient (tb->tb_sb, tb->R[0]); +#endif + reiserfs_invalidate_buffer (tb, tbS0); + + return 0; + } + + if ( tb->rnum[0] == -1 ) { + /* all contents of R[0] and S[0] will be in R[0] */ + leaf_shift_right(tb, n, -1); +#if 0/*preserve list*/ + preserve_invalidate(tb, tbS0, tb->R[0]); +#endif + reiserfs_invalidate_buffer (tb, tbS0); + return 0; + } + +#ifdef CONFIG_REISERFS_CHECK + if ( tb->rnum[0] ) + reiserfs_panic (tb->tb_sb, "PAP-12065: balance_leaf_when_delete: " + "bad rnum parameter must be 0 (%d)", tb->rnum[0]); +#endif + + return 0; +} + + +static int balance_leaf (struct tree_balance * tb, + struct item_head * ih, /* item header of inserted item (this is on little endian) */ + const char * body, /* body of inserted item or bytes to paste */ + int flag, /* i - insert, d - delete, c - cut, p - paste + (see comment to do_balance) */ + struct item_head * insert_key, /* in our processing of one level we sometimes determine what + must be inserted into the next higher level. This insertion + consists of a key or two keys and their corresponding + pointers */ + struct buffer_head ** insert_ptr /* inserted node-ptrs for the next level */ + ) +{ + struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path); +#if 0/*preserve list*/ + struct buffer_head * tbF0 = PATH_H_PPARENT (tb->tb_path, 0); + int S0_b_item_order = PATH_H_B_ITEM_ORDER (tb->tb_path, 0); +#endif + int item_pos = PATH_LAST_POSITION (tb->tb_path); /* index into the array of item headers in S[0] + of the affected item */ + struct buffer_info bi; + struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */ + int snum[2]; /* number of items that will be placed + into S_new (includes partially shifted + items) */ + int sbytes[2]; /* if an item is partially shifted into S_new then + if it is a directory item + it is the number of entries from the item that are shifted into S_new + else + it is the number of bytes from the item that are shifted into S_new + */ + int n, i; + int ret_val; + int pos_in_item; + int zeros_num; + +#if 0 + if (tb->insert_size [0] % 4) { + reiserfs_panic (tb->tb_sb, "balance_leaf: wrong insert_size %d", + tb->insert_size [0]); + } +#endif + /* Make balance in case insert_size[0] < 0 */ + if ( tb->insert_size[0] < 0 ) + return balance_leaf_when_delete (tb, flag); + + zeros_num = 0; + if (flag == M_INSERT && body == 0) + zeros_num = le16_to_cpu (ih->ih_item_len); + + pos_in_item = tb->tb_path->pos_in_item; + /* for indirect item pos_in_item is measured in unformatted node + pointers. Recalculate to bytes */ + if (flag != M_INSERT && is_indirect_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) + pos_in_item *= UNFM_P_SIZE; + + if ( tb->lnum[0] > 0 ) { + /* Shift lnum[0] items from S[0] to the left neighbor L[0] */ + if ( item_pos < tb->lnum[0] ) { + /* new item or it part falls to L[0], shift it too */ + n = B_NR_ITEMS(tb->L[0]); + + switch (flag) { + case M_INSERT: /* insert item into L[0] */ + + if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) { + /* part of new item falls into L[0] */ + int new_item_len; + int version; + +#ifdef CONFIG_REISERFS_CHECK + if (!is_direct_le_ih (ih)) + reiserfs_panic (tb->tb_sb, "PAP-12075: balance_leaf: " + "only direct inserted item can be broken. %h", ih); +#endif + ret_val = leaf_shift_left (tb, tb->lnum[0]-1, -1); + /* when reading the if conditions preceding the subsequent preserve_shifted + lines understand that their goal is to determine if all that we are + shifting is the new data being added */ +#if 0/*preserve list*/ + if (tb->lnum[0] - 1 > 0) { + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->L[0]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); + } +#endif + + /* Calculate item length to insert to S[0] */ + new_item_len = le16_to_cpu (ih->ih_item_len) - tb->lbytes; + /* Calculate and check item length to insert to L[0] */ + ih->ih_item_len -= new_item_len; + +#ifdef CONFIG_REISERFS_CHECK + if ( (int)(ih->ih_item_len) <= 0 ) + reiserfs_panic(tb->tb_sb, "PAP-12080: balance_leaf: " + "there is nothing to insert into L[0]: ih_item_len=%d", + (int)ih->ih_item_len); +#endif + + /* Insert new item into L[0] */ + bi.tb = tb; + bi.bi_bh = tb->L[0]; + bi.bi_parent = tb->FL[0]; + bi.bi_position = get_left_neighbor_position (tb, 0); + leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, + zeros_num > ih->ih_item_len ? ih->ih_item_len : zeros_num); + + version = ih_version (ih); + + /* Calculate key component, item length and body to insert into S[0] */ + set_le_key_k_offset (ih_version (ih), &(ih->ih_key), + le_key_k_offset (ih_version (ih), &(ih->ih_key)) + tb->lbytes); + ih->ih_item_len = cpu_to_le16 (new_item_len); + if ( tb->lbytes > zeros_num ) { + body += (tb->lbytes - zeros_num); + zeros_num = 0; + } + else + zeros_num -= tb->lbytes; + +#ifdef CONFIG_REISERFS_CHECK + if ( (int)(ih->ih_item_len) <= 0 ) + reiserfs_panic(tb->tb_sb, "PAP-12085: balance_leaf: " + "there is nothing to insert into S[0]: ih_item_len=%d", + (int)ih->ih_item_len); +#endif + } else { + /* new item in whole falls into L[0] */ + /* Shift lnum[0]-1 items to L[0] */ + ret_val = leaf_shift_left(tb, tb->lnum[0]-1, tb->lbytes); +#if 0/*preserve list*/ + if (tb->lnum[0] > 1) { + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->L[0]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); + } +#endif + /* Insert new item into L[0] */ + bi.tb = tb; + bi.bi_bh = tb->L[0]; + bi.bi_parent = tb->FL[0]; + bi.bi_position = get_left_neighbor_position (tb, 0); + leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, zeros_num); +#if 0/*preserve list*/ + if (tb->preserve_mode == PRESERVE_INDIRECT_TO_DIRECT){ + mark_suspected_recipient (tb->tb_sb, bi.bi_bh); + } +#endif + tb->insert_size[0] = 0; + zeros_num = 0; + } + break; + + case M_PASTE: /* append item in L[0] */ + + if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) { + /* we must shift the part of the appended item */ + if ( is_direntry_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) { + +#ifdef CONFIG_REISERFS_CHECK + if ( zeros_num ) + reiserfs_panic(tb->tb_sb, "PAP-12090: balance_leaf: illegal parameter in case of a directory"); +#endif + + /* directory item */ + if ( tb->lbytes > pos_in_item ) { + /* new directory entry falls into L[0] */ + struct item_head * pasted; + int l_pos_in_item = pos_in_item; + + /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */ + ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1); +#if 0/*preserve list*/ + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->L[0]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); +#endif + if ( ret_val && ! item_pos ) { + pasted = B_N_PITEM_HEAD(tb->L[0],B_NR_ITEMS(tb->L[0])-1); + l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes-1); + } + + /* Append given directory entry to directory item */ + bi.tb = tb; + bi.bi_bh = tb->L[0]; + bi.bi_parent = tb->FL[0]; + bi.bi_position = get_left_neighbor_position (tb, 0); + leaf_paste_in_buffer (&bi, n + item_pos - ret_val, l_pos_in_item, + tb->insert_size[0], body, zeros_num); + + /* previous string prepared space for pasting new entry, following string pastes this entry */ + + /* when we have merge directory item, pos_in_item has been changed too */ + + /* paste new directory entry. 1 is entry number */ + leaf_paste_entries (bi.bi_bh, n + item_pos - ret_val, l_pos_in_item, 1, + (struct reiserfs_de_head *)body, + body + DEH_SIZE, tb->insert_size[0] + ); + tb->insert_size[0] = 0; + } else { + /* new directory item doesn't fall into L[0] */ + /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */ + leaf_shift_left (tb, tb->lnum[0], tb->lbytes); +#if 0/*preserve list*/ + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->L[0]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); +#endif + } + /* Calculate new position to append in item body */ + pos_in_item -= tb->lbytes; + } + else { + /* regular object */ + +#ifdef CONFIG_REISERFS_CHECK + if ( tb->lbytes <= 0 ) + reiserfs_panic(tb->tb_sb, "PAP-12095: balance_leaf: " + "there is nothing to shift to L[0]. lbytes=%d", + tb->lbytes); + if ( pos_in_item != B_N_PITEM_HEAD(tbS0, item_pos)->ih_item_len ) + reiserfs_panic(tb->tb_sb, "PAP-12100: balance_leaf: " + "incorrect position to paste: item_len=%d, pos_in_item=%d", + B_N_PITEM_HEAD(tbS0,item_pos)->ih_item_len, pos_in_item); +#endif + + if ( tb->lbytes >= pos_in_item ) { + /* appended item will be in L[0] in whole */ + int l_n; + + /* this bytes number must be appended to the last item of L[h] */ + l_n = tb->lbytes - pos_in_item; + + /* Calculate new insert_size[0] */ + tb->insert_size[0] -= l_n; + +#ifdef CONFIG_REISERFS_CHECK + if ( tb->insert_size[0] <= 0 ) + reiserfs_panic(tb->tb_sb, "PAP-12105: balance_leaf: " + "there is nothing to paste into L[0]. insert_size=%d", + tb->insert_size[0]); +#endif + + ret_val = leaf_shift_left(tb,tb->lnum[0], + B_N_PITEM_HEAD(tbS0,item_pos)->ih_item_len); +#if 0/*preserve list*/ + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->L[0]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); +#endif + /* Append to body of item in L[0] */ + bi.tb = tb; + bi.bi_bh = tb->L[0]; + bi.bi_parent = tb->FL[0]; + bi.bi_position = get_left_neighbor_position (tb, 0); + leaf_paste_in_buffer( + &bi,n + item_pos - ret_val, + B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)->ih_item_len, + l_n,body, zeros_num > l_n ? l_n : zeros_num + ); + +#ifdef CONFIG_REISERFS_CHECK + if (l_n && is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], + n + item_pos - ret_val))) + reiserfs_panic(tb->tb_sb, "PAP-12110: balance_leaf: " + "pasting more than 1 unformatted node pointer into indirect item"); +#endif + + /* 0-th item in S0 can be only of DIRECT type when l_n != 0*/ + { + int version; + + version = le16_to_cpu (B_N_PITEM_HEAD (tbS0, 0)->ih_version); + set_le_key_k_offset (version, B_N_PKEY (tbS0, 0), + le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + l_n); + set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]), + le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + l_n); + } +#if 0 + set_le_key_k_offset (B_PRIGHT_DELIM_KEY(tb->L[0]), le_key_k_offset (B_PRIGHT_DELIM_KEY(tb->L[0])) + l_n); +#endif + /* k_offset (B_N_PKEY (tbS0, 0)) += l_n; + k_offset (B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) += l_n; + k_offset (B_PRIGHT_DELIM_KEY(tb->L[0])) += l_n;*/ + +#ifdef NO_CONFIG_REISERFS_CHECK /* journal victim */ + if (!buffer_dirty (tbS0) || !buffer_dirty (tb->CFL[0]) || !buffer_dirty (tb->L[0])) + reiserfs_panic(tb->tb_sb, "PAP-12115: balance_leaf: L, CLF and S must be dirty already"); +#endif + + /* Calculate new body, position in item and insert_size[0] */ + if ( l_n > zeros_num ) { + body += (l_n - zeros_num); + zeros_num = 0; + } + else + zeros_num -= l_n; + pos_in_item = 0; + +#ifdef CONFIG_REISERFS_CHECK + if (comp_short_le_keys (B_N_PKEY(tbS0,0), + B_N_PKEY(tb->L[0],B_NR_ITEMS(tb->L[0])-1)) || + !op_is_left_mergeable (B_N_PKEY (tbS0, 0), tbS0->b_size) || + !op_is_left_mergeable(B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]), tbS0->b_size)) + reiserfs_panic (tb->tb_sb, "PAP-12120: balance_leaf: " + "item must be merge-able with left neighboring item"); +#endif + + } + else /* only part of the appended item will be in L[0] */ + { + /* Calculate position in item for append in S[0] */ + pos_in_item -= tb->lbytes; + +#ifdef CONFIG_REISERFS_CHECK + if ( pos_in_item <= 0 ) + reiserfs_panic(tb->tb_sb, "PAP-12125: balance_leaf: " + "no place for paste. pos_in_item=%d", pos_in_item); +#endif + + /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ + leaf_shift_left(tb,tb->lnum[0],tb->lbytes); +#if 0/*preserve list*/ + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->L[0]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); +#endif + } + } + } + else /* appended item will be in L[0] in whole */ + { + struct item_head * pasted; + +#ifdef REISERFS_FSCK + if ( ! item_pos && is_left_mergeable (tb->tb_sb, tb->tb_path) == 1 ) +#else + if ( ! item_pos && op_is_left_mergeable (B_N_PKEY (tbS0, 0), tbS0->b_size) ) +#endif + { /* if we paste into first item of S[0] and it is left mergable */ + /* then increment pos_in_item by the size of the last item in L[0] */ + pasted = B_N_PITEM_HEAD(tb->L[0],n-1); + if ( is_direntry_le_ih (pasted) ) + pos_in_item += le16_to_cpu (pasted->u.ih_entry_count); + else + pos_in_item += le16_to_cpu (pasted->ih_item_len); + } + + /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ + ret_val = leaf_shift_left(tb,tb->lnum[0],tb->lbytes); +#if 0/*preserve list*/ + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->L[0]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); +#endif + /* Append to body of item in L[0] */ + bi.tb = tb; + bi.bi_bh = tb->L[0]; + bi.bi_parent = tb->FL[0]; + bi.bi_position = get_left_neighbor_position (tb, 0); + leaf_paste_in_buffer (&bi, n + item_pos - ret_val, pos_in_item, tb->insert_size[0], + body, zeros_num); + + /* if appended item is directory, paste entry */ + pasted = B_N_PITEM_HEAD (tb->L[0], n + item_pos - ret_val); + if (is_direntry_le_ih (pasted)) + leaf_paste_entries ( + bi.bi_bh, n + item_pos - ret_val, pos_in_item, 1, + (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0] + ); + /* if appended item is indirect item, put unformatted node into un list */ + if (is_indirect_le_ih (pasted)) + set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace); + tb->insert_size[0] = 0; + zeros_num = 0; + } + break; + default: /* cases d and t */ + reiserfs_panic (tb->tb_sb, "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)", + (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); + } + } else { + /* new item doesn't fall into L[0] */ + leaf_shift_left(tb,tb->lnum[0],tb->lbytes); +#if 0/*preserve list*/ + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->L[0]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); +#endif + } + } /* tb->lnum[0] > 0 */ + + /* Calculate new item position */ + item_pos -= ( tb->lnum[0] - (( tb->lbytes != -1 ) ? 1 : 0)); + + if ( tb->rnum[0] > 0 ) { + /* shift rnum[0] items from S[0] to the right neighbor R[0] */ + n = B_NR_ITEMS(tbS0); + switch ( flag ) { + + case M_INSERT: /* insert item */ + if ( n - tb->rnum[0] < item_pos ) + { /* new item or its part falls to R[0] */ + if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 ) + { /* part of new item falls into R[0] */ + int old_key_comp, old_len, r_zeros_number; + const char * r_body; + int version; + loff_t offset; + +#ifdef CONFIG_REISERFS_CHECK + if ( !is_direct_le_ih (ih) ) + reiserfs_panic(tb->tb_sb, "PAP-12135: balance_leaf: " + "only direct item can be split. (%h)", ih); +#endif + + leaf_shift_right(tb,tb->rnum[0]-1,-1); +#if 0/*preserve list*/ + if (tb->rnum[0]>1) { + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->R[0]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); + } +#endif + + version = le16_to_cpu (ih->ih_version); + /* Remember key component and item length */ + old_key_comp = le_key_k_offset (version, &(ih->ih_key)); + old_len = le16_to_cpu (ih->ih_item_len); + + /* Calculate key component and item length to insert into R[0] */ + offset = le_key_k_offset (version, &(ih->ih_key)) + (old_len - tb->rbytes); + set_le_key_k_offset (version, &(ih->ih_key), offset); + ih->ih_item_len = cpu_to_le16 (tb->rbytes); + /* Insert part of the item into R[0] */ + bi.tb = tb; + bi.bi_bh = tb->R[0]; + bi.bi_parent = tb->FR[0]; + bi.bi_position = get_right_neighbor_position (tb, 0); + if ( offset - old_key_comp > zeros_num ) { + r_zeros_number = 0; + r_body = body + offset - old_key_comp - zeros_num; + } + else { + r_body = body; + r_zeros_number = zeros_num - (offset - old_key_comp); + zeros_num -= r_zeros_number; + } + + leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number); + + /* Replace right delimiting key by first key in R[0] */ + replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); + + /* Calculate key component and item length to insert into S[0] */ + set_le_key_k_offset (version, &(ih->ih_key), old_key_comp); + ih->ih_item_len = cpu_to_le16 (old_len - tb->rbytes); + + tb->insert_size[0] -= tb->rbytes; + + } + else /* whole new item falls into R[0] */ + { + /* Shift rnum[0]-1 items to R[0] */ + ret_val = leaf_shift_right(tb,tb->rnum[0]-1,tb->rbytes); +#if 0/*preserve list*/ + if (tb->rnum[0]>1) { + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->R[0]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); + } +#endif + /* Insert new item into R[0] */ + bi.tb = tb; + bi.bi_bh = tb->R[0]; + bi.bi_parent = tb->FR[0]; + bi.bi_position = get_right_neighbor_position (tb, 0); + leaf_insert_into_buf (&bi, item_pos - n + tb->rnum[0] - 1, ih, body, zeros_num); +#if 0/*preserve list*/ + if (tb->preserve_mode == PRESERVE_INDIRECT_TO_DIRECT){ + mark_suspected_recipient (tb->tb_sb, bi.bi_bh); + } +#endif + + /* If we insert new item in the begin of R[0] change the right delimiting key */ + if ( item_pos - n + tb->rnum[0] - 1 == 0 ) { + replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); + +#if 0 + /* update right delimiting key */ + copy_key(B_PRIGHT_DELIM_KEY(tbS0), &(ih->ih_key)); + reiserfs_mark_buffer_dirty (tbS0, 0); +#endif + } + zeros_num = tb->insert_size[0] = 0; + } + } + else /* new item or part of it doesn't fall into R[0] */ + { + leaf_shift_right(tb,tb->rnum[0],tb->rbytes); +#if 0/*preserve list*/ + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->R[0]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); +#endif + } + break; + + case M_PASTE: /* append item */ + + if ( n - tb->rnum[0] <= item_pos ) /* pasted item or part of it falls to R[0] */ + { + if ( item_pos == n - tb->rnum[0] && tb->rbytes != -1 ) + { /* we must shift the part of the appended item */ + if ( is_direntry_le_ih (B_N_PITEM_HEAD(tbS0, item_pos))) + { /* we append to directory item */ + int entry_count; + +#ifdef CONFIG_REISERFS_CHECK + if ( zeros_num ) + reiserfs_panic(tb->tb_sb, "PAP-12145: balance_leaf: illegal parametr in case of a directory"); +#endif + + entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD(tbS0, item_pos)); + if ( entry_count - tb->rbytes < pos_in_item ) + /* new directory entry falls into R[0] */ + { + int paste_entry_position; + +#ifdef CONFIG_REISERFS_CHECK + if ( tb->rbytes - 1 >= entry_count || ! tb->insert_size[0] ) + reiserfs_panic(tb->tb_sb, "PAP-12150: balance_leaf: " + "no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d", + tb->rbytes, entry_count); +#endif + + /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */ + leaf_shift_right(tb,tb->rnum[0],tb->rbytes - 1); +#if 0/*preserve list*/ + /* if we are shifting more than just the new entry */ + if (tb->rbytes > 1 || tb->rnum[0] > 1) { + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->R[0]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); + } +#endif + /* Paste given directory entry to directory item */ + paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1; + bi.tb = tb; + bi.bi_bh = tb->R[0]; + bi.bi_parent = tb->FR[0]; + bi.bi_position = get_right_neighbor_position (tb, 0); + leaf_paste_in_buffer (&bi, 0, paste_entry_position, + tb->insert_size[0],body,zeros_num); + /* paste entry */ + leaf_paste_entries ( + bi.bi_bh, 0, paste_entry_position, 1, (struct reiserfs_de_head *)body, + body + DEH_SIZE, tb->insert_size[0] + ); + + if ( paste_entry_position == 0 ) { + /* change delimiting keys */ + replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); +#if 0 + copy_key(B_PRIGHT_DELIM_KEY(tbS0), B_N_PKEY(tb->R[0], 0)); + reiserfs_mark_buffer_dirty (tbS0, 0); +#endif + } + + tb->insert_size[0] = 0; + pos_in_item++; + } + else /* new directory entry doesn't fall into R[0] */ + { + leaf_shift_right(tb,tb->rnum[0],tb->rbytes); +#if 0/*preserve list*/ + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->R[0]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); +#endif + } + } + else /* regular object */ + { + int n_shift, n_rem, r_zeros_number; + const char * r_body; + + /* Calculate number of bytes which must be shifted from appended item */ + if ( (n_shift = tb->rbytes - tb->insert_size[0]) < 0 ) + n_shift = 0; + +#ifdef CONFIG_REISERFS_CHECK + if (pos_in_item != B_N_PITEM_HEAD (tbS0, item_pos)->ih_item_len) + reiserfs_panic(tb->tb_sb,"PAP-12155: balance_leaf: invalid position to paste. ih_item_len=%d, pos_in_item=%d", + pos_in_item, B_N_PITEM_HEAD(tbS0,item_pos)->ih_item_len); +#endif + + leaf_shift_right(tb,tb->rnum[0],n_shift); +#if 0/*preserve list*/ + /* if we are shifting an old part from the appended item or more than the appended item is going into R */ + if (n_shift || tb->rnum[0] > 1) { + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->R[0]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); + } +#endif + /* Calculate number of bytes which must remain in body after appending to R[0] */ + if ( (n_rem = tb->insert_size[0] - tb->rbytes) < 0 ) + n_rem = 0; + + { + int version; + + version = ih_version (B_N_PITEM_HEAD (tb->R[0],0)); + set_le_key_k_offset (version, B_N_PKEY(tb->R[0],0), + le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + n_rem); + set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0]), + le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + n_rem); + } +/* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem; + k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/ + do_balance_mark_internal_dirty (tb, tb->CFR[0], 0); + +#if 0 + set_le_key_k_offset (B_PRIGHT_DELIM_KEY(tbS0), le_key_k_offset (B_PRIGHT_DELIM_KEY(tbS0)) + n_rem); +/* k_offset (B_PRIGHT_DELIM_KEY(tbS0)) += n_rem;*/ + reiserfs_mark_buffer_dirty (tbS0, 0); +#endif + /* Append part of body into R[0] */ + bi.tb = tb; + bi.bi_bh = tb->R[0]; + bi.bi_parent = tb->FR[0]; + bi.bi_position = get_right_neighbor_position (tb, 0); + if ( n_rem > zeros_num ) { + r_zeros_number = 0; + r_body = body + n_rem - zeros_num; + } + else { + r_body = body; + r_zeros_number = zeros_num - n_rem; + zeros_num -= r_zeros_number; + } + + leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeros_number); + + if (is_indirect_le_ih (B_N_PITEM_HEAD(tb->R[0],0))) { + +#ifdef CONFIG_REISERFS_CHECK + if (n_rem) + reiserfs_panic(tb->tb_sb, "PAP-12160: balance_leaf: paste more than one unformatted node pointer"); +#endif + + set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), ((struct unfm_nodeinfo*)body)->unfm_freespace); + } + + tb->insert_size[0] = n_rem; + if ( ! n_rem ) + pos_in_item ++; + } + } + else /* pasted item in whole falls into R[0] */ + { + struct item_head * pasted; + + ret_val = leaf_shift_right(tb,tb->rnum[0],tb->rbytes); +#if 0/*preserve list*/ + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->R[0]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); +#endif + /* append item in R[0] */ + if ( pos_in_item >= 0 ) { + bi.tb = tb; + bi.bi_bh = tb->R[0]; + bi.bi_parent = tb->FR[0]; + bi.bi_position = get_right_neighbor_position (tb, 0); + leaf_paste_in_buffer(&bi,item_pos - n + tb->rnum[0], pos_in_item, + tb->insert_size[0],body, zeros_num); + } + + /* paste new entry, if item is directory item */ + pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]); + if (is_direntry_le_ih (pasted) && pos_in_item >= 0 ) { + leaf_paste_entries ( + bi.bi_bh, item_pos - n + tb->rnum[0], pos_in_item, 1, + (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0] + ); + if ( ! pos_in_item ) { + +#ifdef CONFIG_REISERFS_CHECK + if ( item_pos - n + tb->rnum[0] ) + reiserfs_panic (tb->tb_sb, "PAP-12165: balance_leaf: " + "directory item must be first item of node when pasting is in 0th position"); +#endif + + /* update delimiting keys */ + replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); +#if 0 + copy_key(B_PRIGHT_DELIM_KEY(tbS0),B_N_PKEY(tb->R[0], 0)); + reiserfs_mark_buffer_dirty (tbS0, 0); +#endif + } + } + + if (is_indirect_le_ih (pasted)) + set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace); + zeros_num = tb->insert_size[0] = 0; + } + } + else /* new item doesn't fall into R[0] */ + { + leaf_shift_right(tb,tb->rnum[0],tb->rbytes); +#if 0/*preserve list*/ + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->R[0]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); +#endif + } + break; + default: /* cases d and t */ + reiserfs_panic (tb->tb_sb, "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)", + (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); + } + + } /* tb->rnum[0] > 0 */ + + +#ifdef CONFIG_REISERFS_CHECK + if ( tb->blknum[0] > 3 ) + reiserfs_panic (tb->tb_sb, "PAP-12180: balance_leaf: blknum can not be %d. It must be <= 3", tb->blknum[0]); + + if ( tb->blknum[0] < 0 ) + reiserfs_panic (tb->tb_sb, "PAP-12185: balance_leaf: blknum can not be %d. It must be >= 0", tb->blknum[0]); +#endif + + /* if while adding to a node we discover that it is possible to split + it in two, and merge the left part into the left neighbor and the + right part into the right neighbor, eliminating the node */ + if ( tb->blknum[0] == 0 ) { /* node S[0] is empty now */ + +#ifdef CONFIG_REISERFS_CHECK + if ( ! tb->lnum[0] || ! tb->rnum[0] ) + reiserfs_panic(tb->tb_sb, "PAP-12190: balance_leaf: lnum and rnum must not be zero"); +#if 0 + if (COMP_KEYS (B_N_PKEY(tb->R[0], 0), B_PRIGHT_DELIM_KEY(tbS0))) + reiserfs_panic (tb->tb_sb, "vs-12192: balance_leaf: S[0] is being removed from the tree, it has incorrect right delimiting key"); +#endif +#endif + +#if 0 + /* if insertion was done before 0-th position in R[0], right + delimiting key of the tb->L[0]'s and left delimiting key are + not set correctly */ + if (tb->L[0]) { + copy_key(B_PRIGHT_DELIM_KEY(tb->L[0]), B_PRIGHT_DELIM_KEY(tbS0)); + reiserfs_mark_buffer_dirty (tb->L[0], 0); + } + + if (tb->CFL[0]) { + copy_key (B_N_PDELIM_KEY (tb->CFL[0], tb->lkey[0]), B_PRIGHT_DELIM_KEY(tbS0)); + reiserfs_mark_buffer_dirty (tb->CFL[0], 0); + } +#endif + + /* if insertion was done before 0-th position in R[0], right + delimiting key of the tb->L[0]'s and left delimiting key are + not set correctly */ + if (tb->CFL[0]) { + if (!tb->CFR[0]) + reiserfs_panic (tb->tb_sb, "vs-12195: balance_leaf: CFR not initialized"); + copy_key (B_N_PDELIM_KEY (tb->CFL[0], tb->lkey[0]), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0])); + do_balance_mark_internal_dirty (tb, tb->CFL[0], 0); + } + + reiserfs_invalidate_buffer(tb,tbS0); + return 0; + } + + + /* Fill new nodes that appear in place of S[0] */ + + /* I am told that this copying is because we need an array to enable + the looping code. -Hans */ + snum[0] = tb->s1num, + snum[1] = tb->s2num; + sbytes[0] = tb->s1bytes; + sbytes[1] = tb->s2bytes; + for( i = tb->blknum[0] - 2; i >= 0; i-- ) { + +#ifdef CONFIG_REISERFS_CHECK + if (!snum[i]) + reiserfs_panic(tb->tb_sb,"PAP-12200: balance_leaf: snum[%d] == %d. Must be > 0", i, snum[i]); +#endif /* CONFIG_REISERFS_CHECK */ + + /* here we shift from S to S_new nodes */ + + S_new[i] = get_FEB(tb); + + /* initialized block type and tree level */ + B_BLK_HEAD(S_new[i])->blk_level = cpu_to_le16 (DISK_LEAF_NODE_LEVEL); + + + n = B_NR_ITEMS(tbS0); + + switch (flag) { + case M_INSERT: /* insert item */ + + if ( n - snum[i] < item_pos ) + { /* new item or it's part falls to first new node S_new[i]*/ + if ( item_pos == n - snum[i] + 1 && sbytes[i] != -1 ) + { /* part of new item falls into S_new[i] */ + int old_key_comp, old_len, r_zeros_number; + const char * r_body; + int version; + +#ifdef CONFIG_REISERFS_CHECK + if ( !is_direct_le_ih(ih) ) + /* The items which can be inserted are: + Stat_data item, direct item, indirect item and directory item which consist of only two entries "." and "..". + These items must not be broken except for a direct one. */ + reiserfs_panic(tb->tb_sb, "PAP-12205: balance_leaf: " + "non-direct item can not be broken when inserting"); +#endif + + /* Move snum[i]-1 items from S[0] to S_new[i] */ + leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, -1, S_new[i]); +#if 0/*preserve list*/ + if (snum[i] > 1 ) { + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, S_new[i]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); + } +#endif + /* Remember key component and item length */ + version = ih_version (ih); + old_key_comp = le_key_k_offset (version, &(ih->ih_key)); + old_len = le16_to_cpu (ih->ih_item_len); + + /* Calculate key component and item length to insert into S_new[i] */ + set_le_key_k_offset (version, &(ih->ih_key), + le_key_k_offset (version, &(ih->ih_key)) + (old_len - sbytes[i])); + + ih->ih_item_len = cpu_to_le16 (sbytes[i]); + + /* Insert part of the item into S_new[i] before 0-th item */ + bi.tb = tb; + bi.bi_bh = S_new[i]; + bi.bi_parent = 0; + bi.bi_position = 0; + + if ( le_key_k_offset (version, &(ih->ih_key)) - old_key_comp > zeros_num ) { + r_zeros_number = 0; + r_body = body + (le_key_k_offset (version, &(ih->ih_key)) - old_key_comp) - zeros_num; + } + else { + r_body = body; + r_zeros_number = zeros_num - (le_key_k_offset (version, &(ih->ih_key)) - old_key_comp); + zeros_num -= r_zeros_number; + } + + leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number); + + /* Calculate key component and item length to insert into S[i] */ + set_le_key_k_offset (version, &(ih->ih_key), old_key_comp); + ih->ih_item_len = cpu_to_le16 (old_len - sbytes[i]); + tb->insert_size[0] -= sbytes[i]; + } + else /* whole new item falls into S_new[i] */ + { + /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */ + leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, sbytes[i], S_new[i]); + + /* Insert new item into S_new[i] */ + bi.tb = tb; + bi.bi_bh = S_new[i]; + bi.bi_parent = 0; + bi.bi_position = 0; + leaf_insert_into_buf (&bi, item_pos - n + snum[i] - 1, ih, body, zeros_num); +#if 0/*preserve list*/ + if (tb->preserve_mode == PRESERVE_INDIRECT_TO_DIRECT){ + mark_suspected_recipient (tb->tb_sb, bi.bi_bh); + } +#endif + + zeros_num = tb->insert_size[0] = 0; + } + } + + else /* new item or it part don't falls into S_new[i] */ + { + leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]); +#if 0/*preserve list*/ + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, S_new[i]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); +#endif + } + break; + + case M_PASTE: /* append item */ + + if ( n - snum[i] <= item_pos ) /* pasted item or part if it falls to S_new[i] */ + { + if ( item_pos == n - snum[i] && sbytes[i] != -1 ) + { /* we must shift part of the appended item */ + struct item_head * aux_ih; + +#ifdef CONFIG_REISERFS_CHECK + if ( ih ) + reiserfs_panic (tb->tb_sb, "PAP-12210: balance_leaf: ih must be 0"); +#endif /* CONFIG_REISERFS_CHECK */ + + if ( is_direntry_le_ih (aux_ih = B_N_PITEM_HEAD(tbS0,item_pos))) { + /* we append to directory item */ + + int entry_count; + + entry_count = le16_to_cpu (aux_ih->u.ih_entry_count); + + if ( entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count ) { + /* new directory entry falls into S_new[i] */ + +#ifdef CONFIG_REISERFS_CHECK + if ( ! tb->insert_size[0] ) + reiserfs_panic (tb->tb_sb, "PAP-12215: balance_leaif: insert_size is already 0"); + if ( sbytes[i] - 1 >= entry_count ) + reiserfs_panic (tb->tb_sb, "PAP-12220: balance_leaf: " + "there are no so much entries (%d), only %d", + sbytes[i] - 1, entry_count); +#endif + + /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */ + leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i]-1, S_new[i]); +#if 0/*preserve list*/ + /* if more than the affected item is shifted, or if more than + one entry (from the affected item) is shifted */ + if (snum[i] > 1 || sbytes[i] > 1) { + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, S_new[i]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); + } +#endif + /* Paste given directory entry to directory item */ + bi.tb = tb; + bi.bi_bh = S_new[i]; + bi.bi_parent = 0; + bi.bi_position = 0; + leaf_paste_in_buffer (&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, + tb->insert_size[0], body,zeros_num); + /* paste new directory entry */ + leaf_paste_entries ( + bi.bi_bh, 0, pos_in_item - entry_count + sbytes[i] - 1, + 1, (struct reiserfs_de_head *)body, body + DEH_SIZE, + tb->insert_size[0] + ); + tb->insert_size[0] = 0; + pos_in_item++; + } else { /* new directory entry doesn't fall into S_new[i] */ + leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]); + } + } + else /* regular object */ + { + int n_shift, n_rem, r_zeros_number; + const char * r_body; + +#ifdef CONFIG_REISERFS_CHECK + if ( pos_in_item != B_N_PITEM_HEAD(tbS0,item_pos)->ih_item_len || + tb->insert_size[0] <= 0 ) + reiserfs_panic (tb->tb_sb, "PAP-12225: balance_leaf: item too short or insert_size <= 0"); +#endif + + /* Calculate number of bytes which must be shifted from appended item */ + n_shift = sbytes[i] - tb->insert_size[0]; + if ( n_shift < 0 ) + n_shift = 0; + leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]); + + /* Calculate number of bytes which must remain in body after append to S_new[i] */ + n_rem = tb->insert_size[0] - sbytes[i]; + if ( n_rem < 0 ) + n_rem = 0; + /* Append part of body into S_new[0] */ + bi.tb = tb; + bi.bi_bh = S_new[i]; + bi.bi_parent = 0; + bi.bi_position = 0; + + if ( n_rem > zeros_num ) { + r_zeros_number = 0; + r_body = body + n_rem - zeros_num; + } + else { + r_body = body; + r_zeros_number = zeros_num - n_rem; + zeros_num -= r_zeros_number; + } + + leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0]-n_rem, r_body,r_zeros_number); + { + struct item_head * tmp; + + tmp = B_N_PITEM_HEAD(S_new[i],0); + if (is_indirect_le_ih (tmp)) { + if (n_rem) + reiserfs_panic (tb->tb_sb, "PAP-12230: balance_leaf: invalid action with indirect item"); + set_ih_free_space (tmp, ((struct unfm_nodeinfo*)body)->unfm_freespace); + } + set_le_key_k_offset (ih_version (tmp), &tmp->ih_key, + le_key_k_offset (ih_version (tmp), &tmp->ih_key) + n_rem); + } + + tb->insert_size[0] = n_rem; + if ( ! n_rem ) + pos_in_item++; + } + } + else + /* item falls wholly into S_new[i] */ + { + int ret_val; + struct item_head * pasted; + +#ifdef CONFIG_REISERFS_CHECK + struct item_head * ih = B_N_PITEM_HEAD(tbS0,item_pos); + + if ( ! is_direntry_le_ih(ih) && (pos_in_item != ih->ih_item_len || + tb->insert_size[0] <= 0) ) + reiserfs_panic (tb->tb_sb, "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len"); +#endif /* CONFIG_REISERFS_CHECK */ + + ret_val = leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]); +#if 0/*preserve list*/ + /* we must preserve that which we are pasting onto the end of and shifting */ + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, S_new[i]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); +#endif + +#ifdef CONFIG_REISERFS_CHECK + if ( ret_val ) + reiserfs_panic (tb->tb_sb, "PAP-12240: balance_leaf: " + "unexpected value returned by leaf_move_items (%d)", + ret_val); +#endif /* CONFIG_REISERFS_CHECK */ + + /* paste into item */ + bi.tb = tb; + bi.bi_bh = S_new[i]; + bi.bi_parent = 0; + bi.bi_position = 0; + leaf_paste_in_buffer(&bi, item_pos - n + snum[i], pos_in_item, tb->insert_size[0], body, zeros_num); + + pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]); + if (is_direntry_le_ih (pasted)) + { + leaf_paste_entries ( + bi.bi_bh, item_pos - n + snum[i], pos_in_item, 1, + (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0] + ); + } + + /* if we paste to indirect item update ih_free_space */ + if (is_indirect_le_ih (pasted)) + set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace); + zeros_num = tb->insert_size[0] = 0; + } + } + + else /* pasted item doesn't fall into S_new[i] */ + { + leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]); +#if 0/*preserve list*/ + preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, S_new[i]); + tbS0 = PATH_PLAST_BUFFER (tb->tb_path); +#endif + } + break; + default: /* cases d and t */ + reiserfs_panic (tb->tb_sb, "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)", + (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); + } + + memcpy (insert_key + i,B_N_PKEY(S_new[i],0),KEY_SIZE); + insert_ptr[i] = S_new[i]; + +#ifdef CONFIG_REISERFS_CHECK + if (atomic_read (&(S_new[i]->b_count)) != 1) { + if (atomic_read(&(S_new[i]->b_count)) != 2 || + !(buffer_journaled(S_new[i]) || buffer_journal_dirty(S_new[i]))) { + reiserfs_panic (tb->tb_sb, "PAP-12247: balance_leaf: S_new[%d] : (%b)\n", i, S_new[i]); + } + } +#endif + +#if 0 + /* update right_delimiting_key fields */ + copy_key (B_PRIGHT_DELIM_KEY (S_new[i]), B_PRIGHT_DELIM_KEY (tbS0)); + copy_key (B_PRIGHT_DELIM_KEY (tbS0), B_N_PKEY (S_new[i], 0)); + reiserfs_mark_buffer_dirty (tbS0, 0); +#endif + + } + + /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the + affected item which remains in S */ + if ( 0 <= item_pos && item_pos < tb->s0num ) + { /* if we must insert or append into buffer S[0] */ + + switch (flag) + { + case M_INSERT: /* insert item into S[0] */ + bi.tb = tb; + bi.bi_bh = tbS0; + bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); + bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); + leaf_insert_into_buf (&bi, item_pos, ih, body, zeros_num); +#if 0/*preserve list*/ + if (tb->preserve_mode == PRESERVE_INDIRECT_TO_DIRECT){ + mark_suspected_recipient (tb->tb_sb, bi.bi_bh); + } +#endif + + /* If we insert the first key change the delimiting key */ + if( item_pos == 0 ) { + if (tb->CFL[0]) /* can be 0 in reiserfsck */ + replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0); + +#if 0 /* right delim key support */ +#ifdef CONFIG_REISERFS_CHECK + if ( ! tb->CFL[0] || ! tb->L[0] || (B_NR_ITEMS (tbS0) > 1 && + COMP_KEYS(B_PRIGHT_DELIM_KEY(tb->L[0]), B_N_PKEY(tbS0, 1))) ) + reiserfs_panic(tb->tb_sb, "PAP-12250: balance_leaf: invalid right delimiting key"); + if (!buffer_dirty (tb->L[0]) && !(buffer_journaled(tb->L[0]) || + buffer_journal_dirty(tb->L[0]))) + reiserfs_panic (tb->tb_sb, "PAP-12255: balance_leaf: tb->L[0] must be dirty"); +#endif + if (tb->L[0]) /* can be 0 in reiserfsck */ + copy_key (B_PRIGHT_DELIM_KEY (tb->L[0]), &(ih->ih_key)); +#endif /* right delim key support */ + } + break; + + case M_PASTE: { /* append item in S[0] */ + struct item_head * pasted; + + pasted = B_N_PITEM_HEAD (tbS0, item_pos); + /* when directory, may be new entry already pasted */ + if (is_direntry_le_ih (pasted)) { + if ( pos_in_item >= 0 && pos_in_item <= le16_to_cpu (pasted->u.ih_entry_count) ) { + +#ifdef CONFIG_REISERFS_CHECK + if ( ! tb->insert_size[0] ) + reiserfs_panic (tb->tb_sb, "PAP-12260: balance_leaf: insert_size is 0 already"); +#endif /* CONFIG_REISERFS_CHECK */ + + /* prepare space */ + bi.tb = tb; + bi.bi_bh = tbS0; + bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); + bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); + leaf_paste_in_buffer(&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num); + + +#ifdef CONFIG_REISERFS_CHECK +#if 0 + if ( ! item_pos && ! pos_in_item && (! tb->L[0] || COMP_KEYS(B_PRIGHT_DELIM_KEY(tb->L[0]), + B_N_PKEY(tbS0, 0))) ) + reiserfs_panic(tb->tb_sb, "PAP-12265: balance_leaf: invalid right delimiting key"); +#endif +#endif + + /* paste entry */ + leaf_paste_entries ( + bi.bi_bh, item_pos, pos_in_item, 1, (struct reiserfs_de_head *)body, + body + DEH_SIZE, tb->insert_size[0] + ); + if ( ! item_pos && ! pos_in_item ) { + +#ifdef CONFIG_REISERFS_CHECK + if (!tb->CFL[0] || !tb->L[0]) + reiserfs_panic (tb->tb_sb, "PAP-12270: balance_leaf: CFL[0]/L[0] must be specified"); +#endif /* CONFIG_REISERFS_CHECK */ + + if (tb->CFL[0]) { + replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0); + +#if 0 + /* update right delimiting key */ + copy_key (B_PRIGHT_DELIM_KEY (tb->L[0]), B_N_PKEY(tbS0, 0)); + /* probably not needed as something has been shifted to tb->L[0] already */ + reiserfs_mark_buffer_dirty (tb->L[0], 0); +#endif + } + } + tb->insert_size[0] = 0; + } + } else { /* regular object */ + if ( pos_in_item == pasted->ih_item_len ) { + +#ifdef CONFIG_REISERFS_CHECK + if ( tb->insert_size[0] <= 0 ) + reiserfs_panic (tb->tb_sb, + "PAP-12275: balance_leaf: insert size must not be %d", tb->insert_size[0]); +#endif /* CONFIG_REISERFS_CHECK */ + bi.tb = tb; + bi.bi_bh = tbS0; + bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); + bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); + leaf_paste_in_buffer (&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num); + + if (is_indirect_le_ih (pasted)) { + +#ifdef CONFIG_REISERFS_CHECK + if ( tb->insert_size[0] != UNFM_P_SIZE ) + reiserfs_panic (tb->tb_sb, + "PAP-12280: balance_leaf: insert_size for indirect item must be %d, not %d", + UNFM_P_SIZE, tb->insert_size[0]); +#endif /* CONFIG_REISERFS_CHECK */ + + set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace); + } + tb->insert_size[0] = 0; + } + +#ifdef CONFIG_REISERFS_CHECK + else { + if ( tb->insert_size[0] ) { + print_cur_tb ("12285"); + reiserfs_panic (tb->tb_sb, "PAP-12285: balance_leaf: insert_size must be 0 (%d)", tb->insert_size[0]); + } + } +#endif /* CONFIG_REISERFS_CHECK */ + + } + } /* case M_PASTE: */ + } + } + +#ifdef CONFIG_REISERFS_CHECK + if ( flag == M_PASTE && tb->insert_size[0] ) { + print_cur_tb ("12290"); + reiserfs_panic (tb->tb_sb, "PAP-12290: balance_leaf: insert_size is still not 0 (%d)", tb->insert_size[0]); + } +#endif /* CONFIG_REISERFS_CHECK */ + + return 0; +} /* Leaf level of the tree is balanced (end of balance_leaf) */ + + + +/* Make empty node */ +void make_empty_node (struct buffer_info * bi) +{ + struct block_head * blkh; + +#ifdef CONFIG_REISERFS_CHECK + if (bi->bi_bh == NULL) + reiserfs_panic (0, "PAP-12295: make_empty_node: pointer to the buffer is NULL"); +#endif + + (blkh = B_BLK_HEAD(bi->bi_bh))->blk_nr_item = cpu_to_le16 (0); + blkh->blk_free_space = cpu_to_le16 (MAX_CHILD_SIZE(bi->bi_bh)); + + if (bi->bi_parent) + B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size = 0; +} + + +/* Get first empty buffer */ +struct buffer_head * get_FEB (struct tree_balance * tb) +{ + int i; + struct buffer_head * first_b; + struct buffer_info bi; + + for (i = 0; i < MAX_FEB_SIZE; i ++) + if (tb->FEB[i] != 0) + break; + + if (i == MAX_FEB_SIZE) + reiserfs_panic(tb->tb_sb, "vs-12300: get_FEB: FEB list is empty"); + + bi.tb = tb; + bi.bi_bh = first_b = tb->FEB[i]; + bi.bi_parent = 0; + bi.bi_position = 0; + make_empty_node (&bi); + set_bit(BH_Uptodate, &first_b->b_state); + tb->FEB[i] = 0; + tb->used[i] = first_b; + +#ifdef REISERFS_FSCK + mark_block_formatted (first_b->b_blocknr); +#endif + + return(first_b); +} + + +/* This is now used because reiserfs_free_block has to be able to +** schedule. +*/ +static void store_thrown (struct tree_balance * tb, struct buffer_head * bh) +{ + int i; + + if (buffer_dirty (bh)) + printk ("store_thrown deals with dirty buffer\n"); + for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i ++) + if (!tb->thrown[i]) { + tb->thrown[i] = bh; + atomic_inc(&bh->b_count) ; /* decremented in free_thrown */ + return; + } + reiserfs_warning ("store_thrown: too many thrown buffers\n"); +} + +static void free_thrown(struct tree_balance *tb) { + int i ; + unsigned long blocknr ; + for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i++) { + if (tb->thrown[i]) { + blocknr = tb->thrown[i]->b_blocknr ; + if (buffer_dirty (tb->thrown[i])) + printk ("free_thrown deals with dirty buffer %ld\n", blocknr); + brelse(tb->thrown[i]) ; /* incremented in store_thrown */ + reiserfs_free_block (tb->transaction_handle, blocknr); + } + } +} + +void reiserfs_invalidate_buffer (struct tree_balance * tb, struct buffer_head * bh) +{ + B_BLK_HEAD (bh)->blk_level = cpu_to_le16 (FREE_LEVEL)/*0*/; + B_BLK_HEAD (bh)->blk_nr_item = cpu_to_le16 (0); + mark_buffer_clean (bh); + /* reiserfs_free_block is no longer schedule safe + reiserfs_free_block (tb->transaction_handle, tb->tb_sb, bh->b_blocknr); + */ + + store_thrown (tb, bh); +#if 0 +#ifdef REISERFS_FSCK + { + struct buffer_head * to_be_forgotten; + + to_be_forgotten = find_buffer (bh->b_dev, bh->b_blocknr, bh->b_size); + if (to_be_forgotten) { + to_be_forgotten->b_count ++; + bforget (to_be_forgotten); + } + unmark_block_formatted (bh->b_blocknr); + } +#endif +#endif +} + +/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/ +void replace_key (struct tree_balance * tb, struct buffer_head * dest, int n_dest, + struct buffer_head * src, int n_src) +{ + +#ifdef CONFIG_REISERFS_CHECK + if (dest == NULL || src == NULL) + reiserfs_panic (0, "vs-12305: replace_key: sourse or destination buffer is 0 (src=%p, dest=%p)", src, dest); + + if ( ! B_IS_KEYS_LEVEL (dest) ) + reiserfs_panic (0, "vs-12310: replace_key: invalid level (%z) for destination buffer. dest must be leaf", + dest); + + if (n_dest < 0 || n_src < 0) + reiserfs_panic (0, "vs-12315: replace_key: src(%d) or dest(%d) key number less than 0", n_src, n_dest); + + if (n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src)) + reiserfs_panic (0, "vs-12320: replace_key: src(%d(%d)) or dest(%d(%d)) key number is too big", + n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest)); +#endif /* CONFIG_REISERFS_CHECK */ + + if (B_IS_ITEMS_LEVEL (src)) + /* source buffer contains leaf node */ + memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PITEM_HEAD(src,n_src), KEY_SIZE); + else + memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PDELIM_KEY(src,n_src), KEY_SIZE); + + do_balance_mark_internal_dirty (tb, dest, 0); +} + + +int get_left_neighbor_position ( + struct tree_balance * tb, + int h + ) +{ + int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1); + +#ifdef CONFIG_REISERFS_CHECK + if (PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FL[h] == 0) + reiserfs_panic (tb->tb_sb, "vs-12325: get_left_neighbor_position: FL[%d](%p) or F[%d](%p) does not exist", + h, tb->FL[h], h, PATH_H_PPARENT (tb->tb_path, h)); +#endif + + if (Sh_position == 0) + return B_NR_ITEMS (tb->FL[h]); + else + return Sh_position - 1; +} + + +int get_right_neighbor_position (struct tree_balance * tb, int h) +{ + int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1); + +#ifdef CONFIG_REISERFS_CHECK + if (PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FR[h] == 0) + reiserfs_panic (tb->tb_sb, "vs-12330: get_right_neighbor_position: F[%d](%p) or FR[%d](%p) does not exist", + h, PATH_H_PPARENT (tb->tb_path, h), h, tb->FR[h]); +#endif + + if (Sh_position == B_NR_ITEMS (PATH_H_PPARENT (tb->tb_path, h))) + return 0; + else + return Sh_position + 1; +} + + +#ifdef CONFIG_REISERFS_CHECK + +int is_reusable (struct super_block * s, unsigned long block, int bit_value); +static void check_internal_node (struct super_block * s, struct buffer_head * bh, char * mes) +{ + struct disk_child * dc; + int i; + + if (!bh) + reiserfs_panic (s, "PAP-12336: check_internal_node: bh == 0"); + + if (!bh || !B_IS_IN_TREE (bh)) + return; + + if (!buffer_dirty (bh) && + !(buffer_journaled(bh) || buffer_journal_dirty(bh))) { + reiserfs_panic (s, "PAP-12337: check_internal_node: buffer (%b) must be dirty", bh); + } + + dc = B_N_CHILD (bh, 0); + + for (i = 0; i <= B_NR_ITEMS (bh); i ++, dc ++) { + if (!is_reusable (s, dc->dc_block_number, 1) ) { + print_cur_tb (mes); + reiserfs_panic (s, "PAP-12338: check_internal_node: invalid child pointer %y in %b", dc, bh); + } + } +} + + +static int locked_or_not_in_tree (struct buffer_head * bh, char * which) +{ + if ( buffer_locked (bh) || !B_IS_IN_TREE (bh) ) { + reiserfs_warning ("vs-12339: locked_or_not_in_tree: %s (%b)\n", which, bh); + return 1; + } + return 0; +} + + +static int check_before_balancing (struct tree_balance * tb) +{ + int retval = 0; + + if ( cur_tb ) { + reiserfs_panic (tb->tb_sb, "vs-12335: check_before_balancing: " + "suspect that schedule occurred based on cur_tb not being null at this point in code. " + "do_balance cannot properly handle schedule occuring while it runs."); + } + + /* double check that buffers that we will modify are unlocked. (fix_nodes should already have + prepped all of these for us). */ + if ( tb->lnum[0] ) { + retval |= locked_or_not_in_tree (tb->L[0], "L[0]"); + retval |= locked_or_not_in_tree (tb->FL[0], "FL[0]"); + retval |= locked_or_not_in_tree (tb->CFL[0], "CFL[0]"); + check_leaf (tb->L[0]); + } + if ( tb->rnum[0] ) { + retval |= locked_or_not_in_tree (tb->R[0], "R[0]"); + retval |= locked_or_not_in_tree (tb->FR[0], "FR[0]"); + retval |= locked_or_not_in_tree (tb->CFR[0], "CFR[0]"); + check_leaf (tb->R[0]); + } + retval |= locked_or_not_in_tree (PATH_PLAST_BUFFER (tb->tb_path), "S[0]"); + check_leaf (PATH_PLAST_BUFFER (tb->tb_path)); + + return retval; +} + + +void check_after_balance_leaf (struct tree_balance * tb) +{ + if (tb->lnum[0]) { + if (B_FREE_SPACE (tb->L[0]) != + MAX_CHILD_SIZE (tb->L[0]) - B_N_CHILD (tb->FL[0], get_left_neighbor_position (tb, 0))->dc_size) { + print_cur_tb ("12221"); + reiserfs_panic (tb->tb_sb, "PAP-12355: check_after_balance_leaf: shift to left was incorrect"); + } + } + if (tb->rnum[0]) { + if (B_FREE_SPACE (tb->R[0]) != + MAX_CHILD_SIZE (tb->R[0]) - B_N_CHILD (tb->FR[0], get_right_neighbor_position (tb, 0))->dc_size) { + print_cur_tb ("12222"); + reiserfs_panic (tb->tb_sb, "PAP-12360: check_after_balance_leaf: shift to right was incorrect"); + } + } + if (PATH_H_PBUFFER(tb->tb_path,1) && (B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) != + (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) - + B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), + PATH_H_POSITION (tb->tb_path, 1))->dc_size))) { + print_cur_tb ("12223"); + reiserfs_panic (tb->tb_sb, "PAP-12365: check_after_balance_leaf: S is incorrect"); + } +} + + +void check_leaf_level (struct tree_balance * tb) +{ + check_leaf (tb->L[0]); + check_leaf (tb->R[0]); + check_leaf (PATH_PLAST_BUFFER (tb->tb_path)); +} + +void check_internal_levels (struct tree_balance * tb) +{ + int h; + + /* check all internal nodes */ + for (h = 1; tb->insert_size[h]; h ++) { + check_internal_node (tb->tb_sb, PATH_H_PBUFFER (tb->tb_path, h), "BAD BUFFER ON PATH"); + if (tb->lnum[h]) + check_internal_node (tb->tb_sb, tb->L[h], "BAD L"); + if (tb->rnum[h]) + check_internal_node (tb->tb_sb, tb->R[h], "BAD R"); + } + +} + +#endif + + + + + + +/* Now we have all of the buffers that must be used in balancing of + the tree. We rely on the assumption that schedule() will not occur + while do_balance works. ( Only interrupt handlers are acceptable.) + We balance the tree according to the analysis made before this, + using buffers already obtained. For SMP support it will someday be + necessary to add ordered locking of tb. */ + +/* Some interesting rules of balancing: + + we delete a maximum of two nodes per level per balancing: we never + delete R, when we delete two of three nodes L, S, R then we move + them into R. + + we only delete L if we are deleting two nodes, if we delete only + one node we delete S + + if we shift leaves then we shift as much as we can: this is a + deliberate policy of extremism in node packing which results in + higher average utilization after repeated random balance operations + at the cost of more memory copies and more balancing as a result of + small insertions to full nodes. + + if we shift internal nodes we try to evenly balance the node + utilization, with consequent less balancing at the cost of lower + utilization. + + one could argue that the policy for directories in leaves should be + that of internal nodes, but we will wait until another day to + evaluate this.... It would be nice to someday measure and prove + these assumptions as to what is optimal.... + +*/ + +static inline void do_balance_starts (struct tree_balance *tb) +{ + /* use print_cur_tb() to see initial state of struct + tree_balance */ + + /* store_print_tb (tb); */ + +#ifdef CONFIG_REISERFS_CHECK + + /* do not delete, just comment it out */ +/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, + "check");*/ + + if (check_before_balancing (tb)) + reiserfs_panic (tb->tb_sb, "PAP-12340: do_balance: locked buffers in TB"); + +#ifndef __KERNEL__ + if ( atomic_read(&(PATH_PLAST_BUFFER(tb->tb_path)->b_count)) > 1 || (tb->L[0] && atomic_read(&(tb->L[0]->b_count)) > 1) || + (tb->R[0] && atomic_read(&(tb->R[0]->b_count)) > 1) ) { + print_cur_tb ("first three parameters are invalid"); + reiserfs_panic (tb->tb_sb, "PAP-12345: do_balance: counter too big"); + } +#endif /* !__KERNEL__ */ + cur_tb = tb; + +#endif /* CONFIG_REISERFS_CHECK */ +} + + +static inline void do_balance_completed (struct tree_balance * tb) +{ + +#ifdef CONFIG_REISERFS_CHECK + check_leaf_level (tb); + check_internal_levels (tb); + cur_tb = NULL; +#endif + + /* reiserfs_free_block is no longer schedule safe. So, we need to + ** put the buffers we want freed on the thrown list during do_balance, + ** and then free them now + */ + + tb->tb_sb->u.reiserfs_sb.s_do_balance ++; + + + /* release all nodes hold to perform the balancing */ + unfix_nodes(tb); + + free_thrown(tb) ; +} + + + + + +void do_balance (struct tree_balance * tb, /* tree_balance structure */ + struct item_head * ih, /* item header of inserted item */ + const char * body, /* body of inserted item or bytes to paste */ + int flag) /* i - insert, d - delete + c - cut, p - paste + + Cut means delete part of an item + (includes removing an entry from a + directory). + + Delete means delete whole item. + + Insert means add a new item into the + tree. + + Paste means to append to the end of an + existing file or to insert a directory + entry. */ +{ + int child_pos, /* position of a child node in its parent */ + h; /* level of the tree being processed */ + struct item_head insert_key[2]; /* in our processing of one level + we sometimes determine what + must be inserted into the next + higher level. This insertion + consists of a key or two keys + and their corresponding + pointers */ + struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next + level */ + + tb->tb_mode = flag; + tb->need_balance_dirty = 0; + + if (FILESYSTEM_CHANGED_TB(tb)) { + reiserfs_panic(tb->tb_sb, "clm-6000: do_balance, fs generation has changed\n") ; + } + /* if we have no real work to do */ + if ( ! tb->insert_size[0] ) { + reiserfs_warning ("PAP-12350: do_balance: insert_size == 0, mode == %c", + flag); + unfix_nodes(tb); + return; + } + + atomic_inc (&(fs_generation (tb->tb_sb))); + do_balance_starts (tb); + +#ifdef REISERFS_FSCK + if (flag == M_INTERNAL) { + insert_ptr[0] = (struct buffer_head *)body; + /* we must prepare insert_key */ + + if (PATH_H_B_ITEM_ORDER (tb->tb_path, 0)/*LAST_POSITION (tb->tb_path)*//*item_pos*/ == -1) { + /* get delimiting key from buffer in tree */ + copy_key (&insert_key[0].ih_key, B_N_PKEY (PATH_PLAST_BUFFER (tb->tb_path), 0)); + /*insert_ptr[0]->b_item_order = 0;*/ + } else { + /* get delimiting key from new buffer */ + copy_key (&insert_key[0].ih_key, B_N_PKEY((struct buffer_head *)body,0)); + /*insert_ptr[0]->b_item_order = item_pos;*/ + } + + /* and insert_ptr instead of balance_leaf */ + child_pos = PATH_H_B_ITEM_ORDER (tb->tb_path, 0)/*item_pos*/; + } else +#endif + + /* balance leaf returns 0 except if combining L R and S into + one node. see balance_internal() for explanation of this + line of code.*/ + child_pos = PATH_H_B_ITEM_ORDER (tb->tb_path, 0) + + balance_leaf (tb, ih, body, flag, insert_key, insert_ptr); + +#ifdef CONFIG_REISERFS_CHECK + check_after_balance_leaf (tb); +#endif + + /* Balance internal level of the tree. */ + for ( h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++ ) + child_pos = balance_internal (tb, h, child_pos, insert_key, insert_ptr); + + + do_balance_completed (tb); + +} diff -u -r --new-file linux/fs/reiserfs/file.c v2.4.0-test8/linux/fs/reiserfs/file.c --- linux/fs/reiserfs/file.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/file.c Mon Sep 11 05:21:49 2000 @@ -0,0 +1,125 @@ +/* + * Copyright 1996-2000 Hans Reiser, see reiserfs/README for licensing and copyright details + */ + + +#ifdef __KERNEL__ + +#include <linux/sched.h> +#include <linux/reiserfs_fs.h> +#include <linux/smp_lock.h> + +#else + +#include "nokernel.h" + +#endif + +/* +** We pack the tails of files on file close, not at the time they are written. +** This implies an unnecessary copy of the tail and an unnecessary indirect item +** insertion/balancing, for files that are written in one write. +** It avoids unnecessary tail packings (balances) for files that are written in +** multiple writes and are small enough to have tails. +** +** file_release is called by the VFS layer when the file is closed. If +** this is the last open file descriptor, and the file +** small enough to have a tail, and the tail is currently in an +** unformatted node, the tail is converted back into a direct item. +** +** Since reiserfs_file_truncate involves the same checks and conversions +** we just call truncate on the file without changing the file size. +** The file is not truncated at all. +*/ +static int reiserfs_file_release (struct inode * inode, struct file * filp) +{ + + struct reiserfs_transaction_handle th ; + int windex ; + + if (!S_ISREG (inode->i_mode)) + BUG (); + + /* fast out for when nothing needs to be done */ + if ((atomic_read(&inode->i_count) > 1 || + !inode->u.reiserfs_i.i_pack_on_close || + !tail_has_to_be_packed(inode)) && + inode->u.reiserfs_i.i_prealloc_count <= 0) { + return 0; + } + + lock_kernel() ; + down (&inode->i_sem); + journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3) ; + +#ifdef REISERFS_PREALLOCATE + reiserfs_discard_prealloc (&th, inode); +#endif + journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3) ; + + if (atomic_read(&inode->i_count) <= 1 && + inode->u.reiserfs_i.i_pack_on_close && + tail_has_to_be_packed (inode)) { + /* if regular file is released by last holder and it has been + appended (we append by unformatted node only) or its direct + item(s) had to be converted, then it may have to be + indirect2direct converted */ + windex = push_journal_writer("file_release") ; + reiserfs_truncate_file(inode) ; + pop_journal_writer(windex) ; + } + up (&inode->i_sem); + unlock_kernel() ; + return 0; +} + + +/* Sync a reiserfs file. */ +static int reiserfs_sync_file( + struct file * p_s_filp, + struct dentry * p_s_dentry, + int datasync + ) { + struct inode * p_s_inode = p_s_dentry->d_inode; + struct reiserfs_transaction_handle th ; + int n_err = 0; + int windex ; + int jbegin_count = 1 ; + + lock_kernel() ; + + if (!S_ISREG(p_s_inode->i_mode)) + BUG (); + + /* step one, flush all dirty buffers in the file's page map to disk */ + n_err = generic_buffer_fdatasync(p_s_inode, 0, ~0UL) ; + + /* step two, commit the current transaction to flush any metadata + ** changes + */ + journal_begin(&th, p_s_inode->i_sb, jbegin_count) ; + windex = push_journal_writer("sync_file") ; + reiserfs_update_sd(&th, p_s_inode); + pop_journal_writer(windex) ; + journal_end_sync(&th, p_s_inode->i_sb,jbegin_count) ; + unlock_kernel() ; + return ( n_err < 0 ) ? -EIO : 0; +} + + + +struct file_operations reiserfs_file_operations = { + read: generic_file_read, + write: generic_file_write, + mmap: generic_file_mmap, + release: reiserfs_file_release, + fsync: reiserfs_sync_file, + ioctl: reiserfs_ioctl, +}; + + +struct inode_operations reiserfs_file_inode_operations = { + truncate: reiserfs_truncate_file, +}; + + diff -u -r --new-file linux/fs/reiserfs/fix_node.c v2.4.0-test8/linux/fs/reiserfs/fix_node.c --- linux/fs/reiserfs/fix_node.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/fix_node.c Sat Aug 12 01:46:36 2000 @@ -0,0 +1,2897 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ + +/** + ** old_item_num + ** old_entry_num + ** set_entry_sizes + ** create_virtual_node + ** check_left + ** check_right + ** directory_part_size + ** get_num_ver + ** set_parameters + ** is_leaf_removable + ** are_leaves_removable + ** get_empty_nodes + ** get_lfree + ** get_rfree + ** is_left_neighbor_in_cache + ** decrement_key + ** get_far_parent + ** get_parents + ** can_node_be_removed + ** ip_check_balance + ** dc_check_balance_internal + ** dc_check_balance_leaf + ** dc_check_balance + ** check_balance + ** get_direct_parent + ** get_neighbors + ** fix_nodes + ** + ** + **/ + + +#ifdef __KERNEL__ + +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/reiserfs_fs.h> + +#else + +#include "nokernel.h" + +#endif + + + +/* To make any changes in the tree we find a node, that contains item + to be changed/deleted or position in the node we insert a new item + to. We call this node S. To do balancing we need to decide what we + will shift to left/right neighbor, or to a new node, where new item + will be etc. To make this analysis simpler we build virtual + node. Virtual node is an array of items, that will replace items of + node S. (For instance if we are going to delete an item, virtual + node does not contain it). Virtual node keeps information about + item sizes and types, mergeability of first and last items, sizes + of all entries in directory item. We use this array of items when + calculating what we can shift to neighbors and how many nodes we + have to have if we do not any shiftings, if we shift to left/right + neighbor or to both. */ + + +/* taking item number in virtual node, returns number of item, that it has in source buffer */ +static inline int old_item_num (int new_num, int affected_item_num, int mode) +{ + if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) + return new_num; + + if (mode == M_INSERT) { + +#ifdef CONFIG_REISERFS_CHECK + if (new_num == 0) + reiserfs_panic (0,"vs-8005: old_item_num: for INSERT mode and item number of inserted item"); +#endif + + return new_num - 1; + } + +#ifdef CONFIG_REISERFS_CHECK + if (mode != M_DELETE) + reiserfs_panic (0, "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'", mode); +#endif + + /* delete mode */ + return new_num + 1; +} + +static void create_virtual_node (struct tree_balance * tb, int h) +{ + struct item_head * ih; + struct virtual_node * vn = tb->tb_vn; + int new_num; + struct buffer_head * Sh; /* this comes from tb->S[h] */ + + Sh = PATH_H_PBUFFER (tb->tb_path, h); + + /* size of changed node */ + vn->vn_size = MAX_CHILD_SIZE (Sh) - B_FREE_SPACE (Sh) + tb->insert_size[h]; + + /* for internal nodes array if virtual items is not created */ + if (h) { + vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE); + return; + } + + /* number of items in virtual node */ + vn->vn_nr_item = B_NR_ITEMS (Sh) + ((vn->vn_mode == M_INSERT)? 1 : 0) - ((vn->vn_mode == M_DELETE)? 1 : 0); + + /* first virtual item */ + vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1); + memset (vn->vn_vi, 0, vn->vn_nr_item * sizeof (struct virtual_item)); + vn->vn_free_ptr += vn->vn_nr_item * sizeof (struct virtual_item); + + + /* first item in the node */ + ih = B_N_PITEM_HEAD (Sh, 0); + + /* define the mergeability for 0-th item (if it is not being deleted) */ +#ifdef REISERFS_FSCK + if (is_left_mergeable (tb->tb_sb, tb->tb_path) == 1 && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num)) +#else + if (op_is_left_mergeable (&(ih->ih_key), Sh->b_size) && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num)) +#endif + vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE; + + /* go through all items those remain in the virtual node (except for the new (inserted) one) */ + for (new_num = 0; new_num < vn->vn_nr_item; new_num ++) { + int j; + struct virtual_item * vi = vn->vn_vi + new_num; + int is_affected = ((new_num != vn->vn_affected_item_num) ? 0 : 1); + + + if (is_affected && vn->vn_mode == M_INSERT) + continue; + + /* get item number in source node */ + j = old_item_num (new_num, vn->vn_affected_item_num, vn->vn_mode); + + vi->vi_item_len += ih[j].ih_item_len + IH_SIZE; + vi->vi_ih = ih + j; + vi->vi_item = B_I_PITEM (Sh, ih + j); + vi->vi_uarea = vn->vn_free_ptr; + + // FIXME: there is no check, that item operation did not + // consume too much memory + vn->vn_free_ptr += op_create_vi (vn, vi, is_affected, tb->insert_size [0]); + if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) + reiserfs_panic (tb->tb_sb, "vs-8030: create_virtual_node: " + "virtual node space consumed"); + + if (!is_affected) + /* this is not being changed */ + continue; + + if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) { + vn->vn_vi[new_num].vi_item_len += tb->insert_size[0]; + vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted + } + } + + + /* virtual inserted item is not defined yet */ + if (vn->vn_mode == M_INSERT) { + struct virtual_item * vi = vn->vn_vi + vn->vn_affected_item_num; + +#ifdef CONFIG_REISERFS_CHECK + if (vn->vn_ins_ih == 0) + reiserfs_panic (0, "vs-8040: create_virtual_node: item header of inserted item is not specified"); +#endif + + vi->vi_item_len = tb->insert_size[0]; + vi->vi_ih = vn->vn_ins_ih; + vi->vi_item = vn->vn_data; + vi->vi_uarea = vn->vn_free_ptr; + + op_create_vi (vn, vi, 0/*not pasted or cut*/, tb->insert_size [0]); +#if 0 + switch (type/*le_key_k_type (ih_version (vn->vn_ins_ih), &(vn->vn_ins_ih->ih_key))*/) { + case TYPE_STAT_DATA: + vn->vn_vi[vn->vn_affected_item_num].vi_type |= VI_TYPE_STAT_DATA; + break; + case TYPE_DIRECT: + vn->vn_vi[vn->vn_affected_item_num].vi_type |= VI_TYPE_DIRECT; + break; + case TYPE_INDIRECT: + vn->vn_vi[vn->vn_affected_item_num].vi_type |= VI_TYPE_INDIRECT; + break; + default: + /* inseted item is directory (it must be item with "." and "..") */ + vn->vn_vi[vn->vn_affected_item_num].vi_type |= + (VI_TYPE_DIRECTORY | VI_TYPE_FIRST_DIRECTORY_ITEM | VI_TYPE_INSERTED_DIRECTORY_ITEM); + + /* this directory item can not be split, so do not set sizes of entries */ + break; + } +#endif + } + + /* set right merge flag we take right delimiting key and check whether it is a mergeable item */ + if (tb->CFR[0]) { + struct key * key; + + key = B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]); +#ifdef REISERFS_FSCK + if (is_right_mergeable (tb->tb_sb, tb->tb_path) == 1 && (vn->vn_mode != M_DELETE || + vn->vn_affected_item_num != B_NR_ITEMS (Sh) - 1)) +#else + if (op_is_left_mergeable (key, Sh->b_size) && (vn->vn_mode != M_DELETE || + vn->vn_affected_item_num != B_NR_ITEMS (Sh) - 1)) +#endif + vn->vn_vi[vn->vn_nr_item-1].vi_type |= VI_TYPE_RIGHT_MERGEABLE; + +#ifdef CONFIG_REISERFS_CHECK + if (op_is_left_mergeable (key, Sh->b_size) && + !(vn->vn_mode != M_DELETE || vn->vn_affected_item_num != B_NR_ITEMS (Sh) - 1) ) { + /* we delete last item and it could be merged with right neighbor's first item */ + if (!(B_NR_ITEMS (Sh) == 1 && is_direntry_le_ih (B_N_PITEM_HEAD (Sh, 0)) && + I_ENTRY_COUNT (B_N_PITEM_HEAD (Sh, 0)) == 1)) { + /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */ + print_block (Sh, 0, -1, -1); + reiserfs_panic (tb->tb_sb, "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c", + key, vn->vn_affected_item_num, vn->vn_mode, M_DELETE); + } else + /* we can delete directory item, that has only one directory entry in it */ + ; + } +#endif + + } +} + + +/* using virtual node check, how many items can be shifted to left + neighbor */ +static void check_left (struct tree_balance * tb, int h, int cur_free) +{ + int i; + struct virtual_node * vn = tb->tb_vn; + struct virtual_item * vi; + int d_size, ih_size; + +#ifdef CONFIG_REISERFS_CHECK + if (cur_free < 0) + reiserfs_panic (0, "vs-8050: check_left: cur_free (%d) < 0", cur_free); +#endif + + /* internal level */ + if (h > 0) { + tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE); + return; + } + + /* leaf level */ + + if (!cur_free || !vn->vn_nr_item) { + /* no free space or nothing to move */ + tb->lnum[h] = 0; + tb->lbytes = -1; + return; + } + +#ifdef CONFIG_REISERFS_CHECK + if (!PATH_H_PPARENT (tb->tb_path, 0)) + reiserfs_panic (0, "vs-8055: check_left: parent does not exist or invalid"); +#endif + + vi = vn->vn_vi; + if ((unsigned int)cur_free >= (vn->vn_size - ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) { + /* all contents of S[0] fits into L[0] */ + +#ifdef CONFIG_REISERFS_CHECK + if (vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE) + reiserfs_panic (0, "vs-8055: check_left: invalid mode or balance condition failed"); +#endif + + tb->lnum[0] = vn->vn_nr_item; + tb->lbytes = -1; + return; + } + + + d_size = 0, ih_size = IH_SIZE; + + /* first item may be merge with last item in left neighbor */ + if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE) + d_size = -((int)IH_SIZE), ih_size = 0; + + tb->lnum[0] = 0; + for (i = 0; i < vn->vn_nr_item; i ++, ih_size = IH_SIZE, d_size = 0, vi ++) { + d_size += vi->vi_item_len; + if (cur_free >= d_size) { + /* the item can be shifted entirely */ + cur_free -= d_size; + tb->lnum[0] ++; + continue; + } + + /* the item cannot be shifted entirely, try to split it */ + /* check whether L[0] can hold ih and at least one byte of the item body */ + if (cur_free <= ih_size) { + /* cannot shift even a part of the current item */ + tb->lbytes = -1; + return; + } + cur_free -= ih_size; + + tb->lbytes = op_check_left (vi, cur_free, 0, 0); + if (tb->lbytes != -1) + /* count partially shifted item */ + tb->lnum[0] ++; + + break; + } + + return; +} + + +/* using virtual node check, how many items can be shifted to right + neighbor */ +static void check_right (struct tree_balance * tb, int h, int cur_free) +{ + int i; + struct virtual_node * vn = tb->tb_vn; + struct virtual_item * vi; + int d_size, ih_size; + +#ifdef CONFIG_REISERFS_CHECK + if (cur_free < 0) + reiserfs_panic (tb->tb_sb, "vs-8070: check_right: cur_free < 0"); +#endif + + /* internal level */ + if (h > 0) { + tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE); + return; + } + + /* leaf level */ + + if (!cur_free || !vn->vn_nr_item) { + /* no free space */ + tb->rnum[h] = 0; + tb->rbytes = -1; + return; + } + +#ifdef CONFIG_REISERFS_CHECK + if (!PATH_H_PPARENT (tb->tb_path, 0)) + reiserfs_panic (tb->tb_sb, "vs-8075: check_right: parent does not exist or invalid"); +#endif + + vi = vn->vn_vi + vn->vn_nr_item - 1; + if ((unsigned int)cur_free >= (vn->vn_size - ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) { + /* all contents of S[0] fits into R[0] */ + +#ifdef CONFIG_REISERFS_CHECK + if (vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE) + reiserfs_panic (tb->tb_sb, "vs-8080: check_right: invalid mode or balance condition failed"); +#endif + + tb->rnum[h] = vn->vn_nr_item; + tb->rbytes = -1; + return; + } + + d_size = 0, ih_size = IH_SIZE; + + /* last item may be merge with first item in right neighbor */ + if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) + d_size = -(int)IH_SIZE, ih_size = 0; + + tb->rnum[0] = 0; + for (i = vn->vn_nr_item - 1; i >= 0; i --, d_size = 0, ih_size = IH_SIZE, vi --) { + d_size += vi->vi_item_len; + if (cur_free >= d_size) { + /* the item can be shifted entirely */ + cur_free -= d_size; + tb->rnum[0] ++; + continue; + } + + /* check whether R[0] can hold ih and at least one byte of the item body */ + if ( cur_free <= ih_size ) { /* cannot shift even a part of the current item */ + tb->rbytes = -1; + return; + } + + /* R[0] can hold the header of the item and at least one byte of its body */ + cur_free -= ih_size; /* cur_free is still > 0 */ + + tb->rbytes = op_check_right (vi, cur_free); + if (tb->rbytes != -1) + /* count partially shifted item */ + tb->rnum[0] ++; + + break; + } + + return; +} + + +/* + * from - number of items, which are shifted to left neighbor entirely + * to - number of item, which are shifted to right neighbor entirely + * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor + * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */ +static int get_num_ver (int mode, struct tree_balance * tb, int h, + int from, int from_bytes, + int to, int to_bytes, + short * snum012, int flow + ) +{ + int i; + int cur_free; + // int bytes; + int units; + struct virtual_node * vn = tb->tb_vn; + // struct virtual_item * vi; + + int total_node_size, max_node_size, current_item_size; + int needed_nodes; + int start_item, /* position of item we start filling node from */ + end_item, /* position of item we finish filling node by */ + start_bytes,/* number of first bytes (entries for directory) of start_item-th item + we do not include into node that is being filled */ + end_bytes; /* number of last bytes (entries for directory) of end_item-th item + we do node include into node that is being filled */ + int split_item_positions[2]; /* these are positions in virtual item of + items, that are split between S[0] and + S1new and S1new and S2new */ + + split_item_positions[0] = -1; + split_item_positions[1] = -1; + +#ifdef CONFIG_REISERFS_CHECK + /* We only create additional nodes if we are in insert or paste mode + or we are in replace mode at the internal level. If h is 0 and + the mode is M_REPLACE then in fix_nodes we change the mode to + paste or insert before we get here in the code. */ + if ( tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE)) + reiserfs_panic (0, "vs-8100: get_num_ver: insert_size < 0 in overflow"); +#endif + + max_node_size = MAX_CHILD_SIZE (PATH_H_PBUFFER (tb->tb_path, h)); + + /* snum012 [0-2] - number of items, that lay + to S[0], first new node and second new node */ + snum012[3] = -1; /* s1bytes */ + snum012[4] = -1; /* s2bytes */ + + /* internal level */ + if (h > 0) { + i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE); + if (i == max_node_size) + return 1; + return (i / max_node_size + 1); + } + + /* leaf level */ + needed_nodes = 1; + total_node_size = 0; + cur_free = max_node_size; + + // start from 'from'-th item + start_item = from; + // skip its first 'start_bytes' units + start_bytes = ((from_bytes != -1) ? from_bytes : 0); + + // last included item is the 'end_item'-th one + end_item = vn->vn_nr_item - to - 1; + // do not count last 'end_bytes' units of 'end_item'-th item + end_bytes = (to_bytes != -1) ? to_bytes : 0; + + /* go through all item begining from the start_item-th item and ending by + the end_item-th item. Do not count first 'start_bytes' units of + 'start_item'-th item and last 'end_bytes' of 'end_item'-th item */ + + for (i = start_item; i <= end_item; i ++) { + struct virtual_item * vi = vn->vn_vi + i; + int skip_from_end = ((i == end_item) ? end_bytes : 0); + +#ifdef CONFIG_REISERFS_CHECK + if (needed_nodes > 3) { + reiserfs_panic (tb->tb_sb, "vs-8105: get_num_ver: too many nodes are needed"); + } +#endif + + /* get size of current item */ + current_item_size = vi->vi_item_len; + + /* do not take in calculation head part (from_bytes) of from-th item */ + current_item_size -= op_part_size (vi, 0/*from start*/, start_bytes); + + /* do not take in calculation tail part of last item */ + current_item_size -= op_part_size (vi, 1/*from end*/, skip_from_end); + + /* if item fits into current node entierly */ + if (total_node_size + current_item_size <= max_node_size) { + snum012[needed_nodes - 1] ++; + total_node_size += current_item_size; + start_bytes = 0; + continue; + } + + if (current_item_size > max_node_size) { + /* virtual item length is longer, than max size of item in + a node. It is impossible for direct item */ +#ifdef CONFIG_REISERFS_CHECK + if (is_direct_le_ih (vi->vi_ih)) + reiserfs_panic (tb->tb_sb, "vs-8110: get_num_ver: " + "direct item length is %d. It can not be longer than %d", + current_item_size, max_node_size); +#endif + /* we will try to split it */ + flow = 1; + } + + if (!flow) { + /* as we do not split items, take new node and continue */ + needed_nodes ++; i --; total_node_size = 0; + continue; + } + + // calculate number of item units which fit into node being + // filled + { + int free_space; + + free_space = max_node_size - total_node_size - IH_SIZE; + units = op_check_left (vi, free_space, start_bytes, skip_from_end); + if (units == -1) { + /* nothing fits into current node, take new node and continue */ + needed_nodes ++, i--, total_node_size = 0; + continue; + } + } + + /* something fits into the current node */ + //if (snum012[3] != -1 || needed_nodes != 1) + // reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required"); + //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units; + start_bytes += units; + snum012[needed_nodes - 1 + 3] = units; + + if (needed_nodes > 2) + reiserfs_warning ("vs-8111: get_num_ver: split_item_position is out of boundary\n"); + snum012[needed_nodes - 1] ++; + split_item_positions[needed_nodes - 1] = i; + needed_nodes ++; + /* continue from the same item with start_bytes != -1 */ + start_item = i; + i --; + total_node_size = 0; + } + + // sum012[4] (if it is not -1) contains number of units of which + // are to be in S1new, snum012[3] - to be in S0. They are supposed + // to be S1bytes and S2bytes correspondingly, so recalculate + if (snum012[4] > 0) { + int split_item_num; + int bytes_to_r, bytes_to_l; + int bytes_to_S1new; + + split_item_num = split_item_positions[1]; + bytes_to_l = ((from == split_item_num && from_bytes != -1) ? from_bytes : 0); + bytes_to_r = ((end_item == split_item_num && end_bytes != -1) ? end_bytes : 0); + bytes_to_S1new = ((split_item_positions[0] == split_item_positions[1]) ? snum012[3] : 0); + + // s2bytes + snum012[4] = op_unit_num (&vn->vn_vi[split_item_num]) - snum012[4] - bytes_to_r - bytes_to_l - bytes_to_S1new; + + if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY) + reiserfs_warning ("vs-8115: get_num_ver: not directory item\n"); + } + + /* now we know S2bytes, calculate S1bytes */ + if (snum012[3] > 0) { + int split_item_num; + int bytes_to_r, bytes_to_l; + int bytes_to_S2new; + + split_item_num = split_item_positions[0]; + bytes_to_l = ((from == split_item_num && from_bytes != -1) ? from_bytes : 0); + bytes_to_r = ((end_item == split_item_num && end_bytes != -1) ? end_bytes : 0); + bytes_to_S2new = ((split_item_positions[0] == split_item_positions[1] && snum012[4] != -1) ? snum012[4] : 0); + + // s1bytes + snum012[3] = op_unit_num (&vn->vn_vi[split_item_num]) - snum012[3] - bytes_to_r - bytes_to_l - bytes_to_S2new; + } + + return needed_nodes; +} + + +#ifdef CONFIG_REISERFS_CHECK +extern struct tree_balance * cur_tb; +#endif + + +/* Set parameters for balancing. + * Performs write of results of analysis of balancing into structure tb, + * where it will later be used by the functions that actually do the balancing. + * Parameters: + * tb tree_balance structure; + * h current level of the node; + * lnum number of items from S[h] that must be shifted to L[h]; + * rnum number of items from S[h] that must be shifted to R[h]; + * blk_num number of blocks that S[h] will be splitted into; + * s012 number of items that fall into splitted nodes. + * lbytes number of bytes which flow to the left neighbor from the item that is not + * not shifted entirely + * rbytes number of bytes which flow to the right neighbor from the item that is not + * not shifted entirely + * s1bytes number of bytes which flow to the first new node when S[0] splits (this number is contained in s012 array) + */ + +static void set_parameters (struct tree_balance * tb, int h, int lnum, + int rnum, int blk_num, short * s012, int lb, int rb) +{ + + tb->lnum[h] = lnum; + tb->rnum[h] = rnum; + tb->blknum[h] = blk_num; + + if (h == 0) + { /* only for leaf level */ + if (s012 != NULL) + { + tb->s0num = * s012 ++, + tb->s1num = * s012 ++, + tb->s2num = * s012 ++; + tb->s1bytes = * s012 ++; + tb->s2bytes = * s012; + } + tb->lbytes = lb; + tb->rbytes = rb; + } +} + + + +/* check, does node disappear if we shift tb->lnum[0] items to left + neighbor and tb->rnum[0] to the right one. */ +static int is_leaf_removable (struct tree_balance * tb) +{ + struct virtual_node * vn = tb->tb_vn; + int to_left, to_right; + int size; + int remain_items; + + /* number of items, that will be shifted to left (right) neighbor + entirely */ + to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); + to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); + remain_items = vn->vn_nr_item; + + /* how many items remain in S[0] after shiftings to neighbors */ + remain_items -= (to_left + to_right); + + if (remain_items < 1) { + /* all content of node can be shifted to neighbors */ + set_parameters (tb, 0, to_left, vn->vn_nr_item - to_left, 0, NULL, -1, -1); + return 1; + } + + if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1) + /* S[0] is not removable */ + return 0; + + /* check, whether we can divide 1 remaining item between neighbors */ + + /* get size of remaining item (in item units) */ + size = op_unit_num (&(vn->vn_vi[to_left])); + + if (tb->lbytes + tb->rbytes >= size) { + set_parameters (tb, 0, to_left + 1, to_right + 1, 0, NULL, tb->lbytes, -1); + return 1; + } + + return 0; +} + + +/* check whether L, S, R can be joined in one node */ +static int are_leaves_removable (struct tree_balance * tb, int lfree, int rfree) +{ + struct virtual_node * vn = tb->tb_vn; + int ih_size; + struct buffer_head *S0; + + S0 = PATH_H_PBUFFER (tb->tb_path, 0); + + ih_size = 0; + if (vn->vn_nr_item) { + if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE) + ih_size += IH_SIZE; + + if (vn->vn_vi[vn->vn_nr_item-1].vi_type & VI_TYPE_RIGHT_MERGEABLE) + ih_size += IH_SIZE; + } else { + /* there was only one item and it will be deleted */ + struct item_head * ih; + +#ifdef CONFIG_REISERFS_CHECK + if (B_NR_ITEMS (S0) != 1) + reiserfs_panic (0, "vs-8125: are_leaves_removable: item number must be 1: it is %d", B_NR_ITEMS(S0)); +#endif + + ih = B_N_PITEM_HEAD (S0, 0); + if (tb->CFR[0] && !comp_short_le_keys (&(ih->ih_key), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]))) + if (is_direntry_le_ih (ih)) { +#ifndef REISERFS_FSCK + + /* Directory must be in correct state here: that is + somewhere at the left side should exist first directory + item. But the item being deleted can not be that first + one because its right neighbor is item of the same + directory. (But first item always gets deleted in last + turn). So, neighbors of deleted item can be merged, so + we can save ih_size */ + ih_size = IH_SIZE; + +#ifdef CONFIG_REISERFS_CHECK + /* we might check that left neighbor exists and is of the + same directory */ + if (le_key_k_offset (ih_version (ih), &(ih->ih_key)) == DOT_OFFSET) + reiserfs_panic (tb->tb_sb, "vs-8130: are_leaves_removable: " + "first directory item can not be removed until directory is not empty"); +#endif + + +#else /* REISERFS_FSCK */ + + /* we can delete any directory item in fsck (if it is unreachable) */ + if (ih->ih_key.k_offset != DOT_OFFSET) { + /* must get left neighbor here to make sure, that left + neighbor is of the same directory */ + struct buffer_head * left; + + left = get_left_neighbor (tb->tb_sb, tb->tb_path); + if (left) { + struct item_head * last; + + if (B_NR_ITEMS (left) == 0) + reiserfs_panic (tb->tb_sb, "vs-8135: are_leaves_removable: " + "empty node in the tree"); + last = B_N_PITEM_HEAD (left, B_NR_ITEMS (left) - 1); + if (!comp_short_keys (&last->ih_key, &ih->ih_key)) + ih_size = IH_SIZE; + brelse (left); + } + } +#endif + } + + } + + if (MAX_CHILD_SIZE (S0) + vn->vn_size <= rfree + lfree + ih_size) { + set_parameters (tb, 0, -1, -1, -1, NULL, -1, -1); + return 1; + } + return 0; + +} + + + +/* when we do not split item, lnum and rnum are numbers of entire items */ +#define SET_PAR_SHIFT_LEFT \ +if (h)\ +{\ + int to_l;\ + \ + to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\ + (MAX_NR_KEY(Sh) + 1 - lpar);\ + \ + set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\ +}\ +else \ +{\ + if (lset==LEFT_SHIFT_FLOW)\ + set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\ + tb->lbytes, -1);\ + else\ + set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\ + -1, -1);\ +} + + +#define SET_PAR_SHIFT_RIGHT \ +if (h)\ +{\ + int to_r;\ + \ + to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\ + \ + set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\ +}\ +else \ +{\ + if (rset==RIGHT_SHIFT_FLOW)\ + set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\ + -1, tb->rbytes);\ + else\ + set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\ + -1, -1);\ +} + + +void free_buffers_in_tb ( + struct tree_balance * p_s_tb + ) { + int n_counter; + + decrement_counters_in_path(p_s_tb->tb_path); + + for ( n_counter = 0; n_counter < MAX_HEIGHT; n_counter++ ) { + decrement_bcount(p_s_tb->L[n_counter]); + p_s_tb->L[n_counter] = NULL; + decrement_bcount(p_s_tb->R[n_counter]); + p_s_tb->R[n_counter] = NULL; + decrement_bcount(p_s_tb->FL[n_counter]); + p_s_tb->FL[n_counter] = NULL; + decrement_bcount(p_s_tb->FR[n_counter]); + p_s_tb->FR[n_counter] = NULL; + decrement_bcount(p_s_tb->CFL[n_counter]); + p_s_tb->CFL[n_counter] = NULL; + decrement_bcount(p_s_tb->CFR[n_counter]); + p_s_tb->CFR[n_counter] = NULL; + } +} + + +/* Get new buffers for storing new nodes that are created while balancing. + * Returns: SCHEDULE_OCCURED - schedule occured while the function worked; + * CARRY_ON - schedule didn't occur while the function worked; + * NO_DISK_SPACE - no disk space. + */ +/* The function is NOT SCHEDULE-SAFE! */ +static int get_empty_nodes( + struct tree_balance * p_s_tb, + int n_h + ) { + struct buffer_head * p_s_new_bh, + * p_s_Sh = PATH_H_PBUFFER (p_s_tb->tb_path, n_h); + unsigned long * p_n_blocknr, + a_n_blocknrs[MAX_AMOUNT_NEEDED] = {0, }; + int n_counter, + n_number_of_freeblk, + n_amount_needed,/* number of needed empty blocks */ + n_retval = CARRY_ON; + struct super_block * p_s_sb = p_s_tb->tb_sb; + + +#ifdef REISERFS_FSCK + if (n_h == 0 && p_s_tb->insert_size[n_h] == 0x7fff) + return CARRY_ON; +#endif + + /* number_of_freeblk is the number of empty blocks which have been + acquired for use by the balancing algorithm minus the number of + empty blocks used in the previous levels of the analysis, + number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs + after empty blocks are acquired, and the balancing analysis is + then restarted, amount_needed is the number needed by this level + (n_h) of the balancing analysis. + + Note that for systems with many processes writing, it would be + more layout optimal to calculate the total number needed by all + levels and then to run reiserfs_new_blocks to get all of them at once. */ + + /* Initiate number_of_freeblk to the amount acquired prior to the restart of + the analysis or 0 if not restarted, then subtract the amount needed + by all of the levels of the tree below n_h. */ + /* blknum includes S[n_h], so we subtract 1 in this calculation */ + for ( n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum; n_counter < n_h; n_counter++ ) + n_number_of_freeblk -= ( p_s_tb->blknum[n_counter] ) ? (p_s_tb->blknum[n_counter] - 1) : 0; + + /* Allocate missing empty blocks. */ + /* if p_s_Sh == 0 then we are getting a new root */ + n_amount_needed = ( p_s_Sh ) ? (p_s_tb->blknum[n_h] - 1) : 1; + /* Amount_needed = the amount that we need more than the amount that we have. */ + if ( n_amount_needed > n_number_of_freeblk ) + n_amount_needed -= n_number_of_freeblk; + else /* If we have enough already then there is nothing to do. */ + return CARRY_ON; + + if ( reiserfs_new_blocknrs (p_s_tb->transaction_handle, a_n_blocknrs, + PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_blocknr, n_amount_needed) == NO_DISK_SPACE ) + return NO_DISK_SPACE; + + /* for each blocknumber we just got, get a buffer and stick it on FEB */ + for ( p_n_blocknr = a_n_blocknrs, n_counter = 0; n_counter < n_amount_needed; + p_n_blocknr++, n_counter++ ) { + +#ifdef CONFIG_REISERFS_CHECK + if ( ! *p_n_blocknr ) + reiserfs_panic(p_s_sb, "PAP-8135: get_empty_nodes: reiserfs_new_blocknrs failed when got new blocks"); +#endif + + p_s_new_bh = reiserfs_getblk(p_s_sb->s_dev, *p_n_blocknr, p_s_sb->s_blocksize); + if (atomic_read (&(p_s_new_bh->b_count)) > 1) { +/*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&*/ +/* + reiserfs_warning ("waiting for buffer %b, iput inode pid = %d, this pid %d, mode %c, %h\n", + p_s_new_bh, put_inode_pid, current->pid, p_s_tb->tb_vn->vn_mode, p_s_tb->tb_vn->vn_ins_ih); + print_tb (0, 0, 0, p_s_tb, "tb"); +*/ +/*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&*/ + if (atomic_read(&(p_s_new_bh->b_count)) > 2 || + !(buffer_journaled(p_s_new_bh) || buffer_journal_dirty(p_s_new_bh))) { + n_retval = REPEAT_SEARCH ; + free_buffers_in_tb (p_s_tb); + wait_buffer_until_released (p_s_new_bh); + } + } +#ifdef CONFIG_REISERFS_CHECK + if (atomic_read (&(p_s_new_bh->b_count)) != 1 || buffer_dirty (p_s_new_bh)) { + if (atomic_read(&(p_s_new_bh->b_count)) > 2 || + !(buffer_journaled(p_s_new_bh) || buffer_journal_dirty(p_s_new_bh))) { + reiserfs_panic(p_s_sb,"PAP-8140: get_empty_nodes: not free or dirty buffer %b for the new block", + p_s_new_bh); + } + } +#endif + + /* Put empty buffers into the array. */ + if (p_s_tb->FEB[p_s_tb->cur_blknum]) + BUG(); + + p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh; + } + + if ( n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB (p_s_tb) ) + n_retval = REPEAT_SEARCH ; + + return n_retval; +} + + +/* Get free space of the left neighbor, which is stored in the parent + * node of the left neighbor. */ +static int get_lfree (struct tree_balance * tb, int h) +{ + struct buffer_head * l, * f; + int order; + + if ((f = PATH_H_PPARENT (tb->tb_path, h)) == 0 || (l = tb->FL[h]) == 0) + return 0; + + if (f == l) + order = PATH_H_B_ITEM_ORDER (tb->tb_path, h) - 1; + else { + order = B_NR_ITEMS (l); + f = l; + } + + return (MAX_CHILD_SIZE(f) - le16_to_cpu (B_N_CHILD(f,order)->dc_size)); +} + + +/* Get free space of the right neighbor, + * which is stored in the parent node of the right neighbor. + */ +static int get_rfree (struct tree_balance * tb, int h) +{ + struct buffer_head * r, * f; + int order; + + if ((f = PATH_H_PPARENT (tb->tb_path, h)) == 0 || (r = tb->FR[h]) == 0) + return 0; + + if (f == r) + order = PATH_H_B_ITEM_ORDER (tb->tb_path, h) + 1; + else { + order = 0; + f = r; + } + + return (MAX_CHILD_SIZE(f) - B_N_CHILD(f,order)->dc_size); + +} + + +/* Check whether left neighbor is in memory. */ +static int is_left_neighbor_in_cache( + struct tree_balance * p_s_tb, + int n_h + ) { + struct buffer_head * p_s_father, * left; + struct super_block * p_s_sb = p_s_tb->tb_sb; + unsigned long n_left_neighbor_blocknr; + int n_left_neighbor_position; + + if ( ! p_s_tb->FL[n_h] ) /* Father of the left neighbor does not exist. */ + return 0; + + /* Calculate father of the node to be balanced. */ + p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1); + +#ifdef CONFIG_REISERFS_CHECK + if ( ! p_s_father || ! B_IS_IN_TREE (p_s_father) || ! B_IS_IN_TREE (p_s_tb->FL[n_h]) || + ! buffer_uptodate (p_s_father) || ! buffer_uptodate (p_s_tb->FL[n_h]) ) { + reiserfs_panic (p_s_sb, "vs-8165: is_left_neighbor_in_cache: F[h] (%b) or FL[h] (%b) is invalid", + p_s_father, p_s_tb->FL[n_h]); + } +#endif + + + /* Get position of the pointer to the left neighbor into the left father. */ + n_left_neighbor_position = ( p_s_father == p_s_tb->FL[n_h] ) ? + p_s_tb->lkey[n_h] : B_NR_ITEMS (p_s_tb->FL[n_h]); + /* Get left neighbor block number. */ + n_left_neighbor_blocknr = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position); + /* Look for the left neighbor in the cache. */ + if ( (left = get_hash_table(p_s_sb->s_dev, n_left_neighbor_blocknr, p_s_sb->s_blocksize)) ) { + +#ifdef CONFIG_REISERFS_CHECK + if ( buffer_uptodate (left) && ! B_IS_IN_TREE(left) ) { + reiserfs_panic(p_s_sb, "vs-8170: is_left_neighbor_in_cache: left neighbor (%b %z) is not in the tree", + left, left); + } +#endif + atomic_dec (&(left->b_count)); + return 1; + } + + return 0; +} + + +#define LEFT_PARENTS 'l' +#define RIGHT_PARENTS 'r' + + +static void decrement_key (struct cpu_key * p_s_key) +{ + // call item specific function for this key + item_ops[cpu_key_k_type (p_s_key)]->decrement_key (p_s_key); + + +#if 0 /* this works wrong when key is key of second part of tail: it + sets key to be of indirect type. It looks like it makes no + harm but it is unclear */ + + unsigned long * p_n_key_field = (unsigned long *)p_s_key + REISERFS_FULL_KEY_LEN - 1; + int n_counter; + + for( n_counter = 0; n_counter < REISERFS_FULL_KEY_LEN; n_counter++, p_n_key_field-- ) { + if ( *p_n_key_field ) { + (*p_n_key_field)--; + break; + } + } +#ifdef CONFIG_REISERFS_CHECK + if ( n_counter == REISERFS_FULL_KEY_LEN ) + reiserfs_panic(NULL, "PAP-8175: decrement_key: zero key"); +#endif + +#endif /*0*/ + +} + + + + +/* Calculate far left/right parent of the left/right neighbor of the current node, that + * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h]. + * Calculate left/right common parent of the current node and L[h]/R[h]. + * Calculate left/right delimiting key position. + * Returns: PATH_INCORRECT - path in the tree is not correct; + SCHEDULE_OCCURRED - schedule occured while the function worked; + * CARRY_ON - schedule didn't occur while the function worked; + */ +static int get_far_parent (struct tree_balance * p_s_tb, + int n_h, + struct buffer_head ** pp_s_father, + struct buffer_head ** pp_s_com_father, + char c_lr_par) +{ + struct buffer_head * p_s_parent; + INITIALIZE_PATH (s_path_to_neighbor_father); + struct path * p_s_path = p_s_tb->tb_path; + struct cpu_key s_lr_father_key; + int n_counter, + n_position = MAX_INT, + n_first_last_position = 0, + n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h); + + /* Starting from F[n_h] go upwards in the tree, and look for the common + ancestor of F[n_h], and its neighbor l/r, that should be obtained. */ + + n_counter = n_path_offset; + +#ifdef CONFIG_REISERFS_CHECK + if ( n_counter < FIRST_PATH_ELEMENT_OFFSET ) + reiserfs_panic(p_s_tb->tb_sb, "PAP-8180: get_far_parent: invalid path length"); +#endif + + + for ( ; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter-- ) { + /* Check whether parent of the current buffer in the path is really parent in the tree. */ + if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1)) ) + return REPEAT_SEARCH; + /* Check whether position in the parent is correct. */ + if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_counter - 1)) > B_NR_ITEMS(p_s_parent) ) + return REPEAT_SEARCH; + /* Check whether parent at the path really points to the child. */ + if ( B_N_CHILD_NUM(p_s_parent, n_position) != + PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr ) + return REPEAT_SEARCH; + /* Return delimiting key if position in the parent is not equal to first/last one. */ + if ( c_lr_par == RIGHT_PARENTS ) + n_first_last_position = B_NR_ITEMS (p_s_parent); + if ( n_position != n_first_last_position ) { + *pp_s_com_father = p_s_parent; + atomic_inc (&((*pp_s_com_father)->b_count)); + /*(*pp_s_com_father = p_s_parent)->b_count++;*/ + break; + } + } + + /* if we are in the root of the tree, then there is no common father */ + if ( n_counter == FIRST_PATH_ELEMENT_OFFSET ) { + /* Check whether first buffer in the path is the root of the tree. */ + if ( PATH_OFFSET_PBUFFER(p_s_tb->tb_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == + SB_ROOT_BLOCK (p_s_tb->tb_sb) ) { + *pp_s_father = *pp_s_com_father = NULL; + return CARRY_ON; + } + return REPEAT_SEARCH; + } + +#ifdef CONFIG_REISERFS_CHECK + if ( B_LEVEL (*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL ) { + reiserfs_panic(p_s_tb->tb_sb, "PAP-8185: get_far_parent: (%b %z) level too small", *pp_s_com_father, *pp_s_com_father); + } +#endif + + /* Check whether the common parent is locked. */ + + if ( buffer_locked (*pp_s_com_father) ) { + __wait_on_buffer(*pp_s_com_father); + if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { + decrement_bcount(*pp_s_com_father); + return REPEAT_SEARCH; + } + } + + /* So, we got common parent of the current node and its left/right neighbor. + Now we are geting the parent of the left/right neighbor. */ + + /* Form key to get parent of the left/right neighbor. */ + le_key2cpu_key (&s_lr_father_key, B_N_PDELIM_KEY(*pp_s_com_father, ( c_lr_par == LEFT_PARENTS ) ? + (p_s_tb->lkey[n_h - 1] = n_position - 1) : (p_s_tb->rkey[n_h - 1] = n_position))); + + + if ( c_lr_par == LEFT_PARENTS ) + decrement_key(&s_lr_father_key); + + if (search_by_key(p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, n_h + 1) == IO_ERROR) + // path is released + return IO_ERROR; + + if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { + decrement_counters_in_path(&s_path_to_neighbor_father); + decrement_bcount(*pp_s_com_father); + return REPEAT_SEARCH; + } + + *pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father); + +#ifdef CONFIG_REISERFS_CHECK + if ( B_LEVEL (*pp_s_father) != n_h + 1 ) { + reiserfs_panic(p_s_tb->tb_sb, "PAP-8190: get_far_parent: (%b %z) level too small", *pp_s_father, *pp_s_father); + } + + if ( s_path_to_neighbor_father.path_length < FIRST_PATH_ELEMENT_OFFSET ) + reiserfs_panic(0, "PAP-8192: get_far_parent: path length is too small"); + +#endif + + s_path_to_neighbor_father.path_length--; + decrement_counters_in_path(&s_path_to_neighbor_father); + return CARRY_ON; +} + + +/* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of + * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset], + * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset]. + * Calculate numbers of left and right delimiting keys position: lkey[n_path_offset], rkey[n_path_offset]. + * Returns: SCHEDULE_OCCURRED - schedule occured while the function worked; + * CARRY_ON - schedule didn't occur while the function worked; + */ +static int get_parents (struct tree_balance * p_s_tb, int n_h) +{ + struct path * p_s_path = p_s_tb->tb_path; + int n_position, + n_ret_value, + n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); + struct buffer_head * p_s_curf, + * p_s_curcf; + + /* Current node is the root of the tree or will be root of the tree */ + if ( n_path_offset <= FIRST_PATH_ELEMENT_OFFSET ) { + /* The root can not have parents. + Release nodes which previously were obtained as parents of the current node neighbors. */ + decrement_bcount(p_s_tb->FL[n_h]); + decrement_bcount(p_s_tb->CFL[n_h]); + decrement_bcount(p_s_tb->FR[n_h]); + decrement_bcount(p_s_tb->CFR[n_h]); + p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] = p_s_tb->CFR[n_h] = NULL; + return CARRY_ON; + } + + /* Get parent FL[n_path_offset] of L[n_path_offset]. */ + if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1)) ) { + /* Current node is not the first child of its parent. */ + /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2;*/ + p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1); + atomic_inc (&(p_s_curf->b_count)); + atomic_inc (&(p_s_curf->b_count)); + p_s_tb->lkey[n_h] = n_position - 1; + } + else { + /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node. + Calculate current common parent of L[n_path_offset] and the current node. Note that + CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset]. + Calculate lkey[n_path_offset]. */ + if ( (n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf, + &p_s_curcf, LEFT_PARENTS)) != CARRY_ON ) + return n_ret_value; + } + + decrement_bcount(p_s_tb->FL[n_h]); + p_s_tb->FL[n_h] = p_s_curf; /* New initialization of FL[n_h]. */ + decrement_bcount(p_s_tb->CFL[n_h]); + p_s_tb->CFL[n_h] = p_s_curcf; /* New initialization of CFL[n_h]. */ + +#ifdef CONFIG_REISERFS_CHECK + if ((p_s_curf && !B_IS_IN_TREE (p_s_curf)) || (p_s_curcf && !B_IS_IN_TREE (p_s_curcf))) { + reiserfs_panic (p_s_tb->tb_sb, "PAP-8195: get_parents: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf); + } +#endif + +/* Get parent FR[n_h] of R[n_h]. */ + +/* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */ + if ( n_position == B_NR_ITEMS (PATH_H_PBUFFER(p_s_path, n_h + 1)) ) { +/* Calculate current parent of R[n_h], which is the right neighbor of F[n_h]. + Calculate current common parent of R[n_h] and current node. Note that CFR[n_h] + not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */ + if ( (n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf, &p_s_curcf, RIGHT_PARENTS)) != CARRY_ON ) + return n_ret_value; + } + else { +/* Current node is not the last child of its parent F[n_h]. */ + /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2;*/ + p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1); + atomic_inc (&(p_s_curf->b_count)); + atomic_inc (&(p_s_curf->b_count)); + p_s_tb->rkey[n_h] = n_position; + } + + decrement_bcount(p_s_tb->FR[n_h]); + p_s_tb->FR[n_h] = p_s_curf; /* New initialization of FR[n_path_offset]. */ + + decrement_bcount(p_s_tb->CFR[n_h]); + p_s_tb->CFR[n_h] = p_s_curcf; /* New initialization of CFR[n_path_offset]. */ + +#ifdef CONFIG_REISERFS_CHECK +#if 0 + if (n_h == 0 && p_s_tb->CFR[n_h] && COMP_KEYS (B_PRIGHT_DELIM_KEY (PATH_H_PBUFFER(p_s_path, n_h)), + B_N_PDELIM_KEY (p_s_tb->CFR[n_h], p_s_tb->rkey[n_h]))) { + reiserfs_panic (p_s_tb->tb_sb, "PAP-8200: get_parents: rdkey in S0 %k, rdkey in CFR0 %k do not match", + B_PRIGHT_DELIM_KEY (PATH_H_PBUFFER(p_s_path, n_h)), B_N_PDELIM_KEY (p_s_tb->CFR[n_h], p_s_tb->rkey[n_h])); + } +#endif + if ((p_s_curf && !B_IS_IN_TREE (p_s_curf)) || (p_s_curcf && !B_IS_IN_TREE (p_s_curcf))) { + reiserfs_panic (p_s_tb->tb_sb, "PAP-8205: get_parents: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf); + } +#endif + + return CARRY_ON; +} + + +/* it is possible to remove node as result of shiftings to + neighbors even when we insert or paste item. */ +static inline int can_node_be_removed (int mode, int lfree, int sfree, int rfree, struct tree_balance * tb, int h) +{ + struct buffer_head * Sh = PATH_H_PBUFFER (tb->tb_path, h); + int levbytes = tb->insert_size[h]; + struct item_head * ih; + struct key * r_key = NULL; + + ih = B_N_PITEM_HEAD (Sh, 0); + if ( tb->CFR[h] ) + r_key = B_N_PDELIM_KEY(tb->CFR[h],tb->rkey[h]); + + if ( + lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes + /* shifting may merge items which might save space */ +#ifdef REISERFS_FSCK + - (( ! h && is_left_mergeable (tb->tb_sb, tb->tb_path) == 1 ) ? IH_SIZE : 0) + - (( ! h && r_ih && is_right_mergeable (tb->tb_sb, tb->tb_path) == 1 ) ? IH_SIZE : 0) +#else + - (( ! h && op_is_left_mergeable (&(ih->ih_key), Sh->b_size) ) ? IH_SIZE : 0) + - (( ! h && r_key && op_is_left_mergeable (r_key, Sh->b_size) ) ? IH_SIZE : 0) +#endif + + (( h ) ? KEY_SIZE : 0)) + { + /* node can not be removed */ + if (sfree >= levbytes ) { /* new item fits into node S[h] without any shifting */ + if ( ! h ) + tb->s0num = B_NR_ITEMS(Sh) + ((mode == M_INSERT ) ? 1 : 0); + set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); + return NO_BALANCING_NEEDED; + } + } + return !NO_BALANCING_NEEDED; +} + + + +/* Check whether current node S[h] is balanced when increasing its size by + * Inserting or Pasting. + * Calculate parameters for balancing for current level h. + * Parameters: + * tb tree_balance structure; + * h current level of the node; + * inum item number in S[h]; + * mode i - insert, p - paste; + * Returns: 1 - schedule occured; + * 0 - balancing for higher levels needed; + * -1 - no balancing for higher levels needed; + * -2 - no disk space. + */ +/* ip means Inserting or Pasting */ +static int ip_check_balance (struct tree_balance * tb, int h) +{ + struct virtual_node * vn = tb->tb_vn; + int levbytes, /* Number of bytes that must be inserted into (value + is negative if bytes are deleted) buffer which + contains node being balanced. The mnemonic is + that the attempted change in node space used level + is levbytes bytes. */ + n_ret_value; + + int lfree, sfree, rfree /* free space in L, S and R */; + + /* nver is short for number of vertixes, and lnver is the number if + we shift to the left, rnver is the number if we shift to the + right, and lrnver is the number if we shift in both directions. + The goal is to minimize first the number of vertixes, and second, + the number of vertixes whose contents are changed by shifting, + and third the number of uncached vertixes whose contents are + changed by shifting and must be read from disk. */ + int nver, lnver, rnver, lrnver; + + /* used at leaf level only, S0 = S[0] is the node being balanced, + sInum [ I = 0,1,2 ] is the number of items that will + remain in node SI after balancing. S1 and S2 are new + nodes that might be created. */ + + /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters. + where 4th parameter is s1bytes and 5th - s2bytes + */ + short snum012[40] = {0,}; /* s0num, s1num, s2num for 8 cases + 0,1 - do not shift and do not shift but bottle + 2 - shift only whole item to left + 3 - shift to left and bottle as much as possible + 4,5 - shift to right (whole items and as much as possible + 6,7 - shift to both directions (whole items and as much as possible) + */ + + /* Sh is the node whose balance is currently being checked */ + struct buffer_head * Sh; + +#ifdef REISERFS_FSCK + /* special mode for insert pointer to the most low internal node */ + if (h == 0 && vn->vn_mode == M_INTERNAL) { + /* blk_num == 2 is to get pointer inserted to the next level */ + set_parameters (tb, h, 0, 0, 2, NULL, -1, -1); + return 0; + } +#endif + + Sh = PATH_H_PBUFFER (tb->tb_path, h); + levbytes = tb->insert_size[h]; + + /* Calculate balance parameters for creating new root. */ + if ( ! Sh ) { + if ( ! h ) + reiserfs_panic (tb->tb_sb, "vs-8210: ip_check_balance: S[0] can not be 0"); + switch ( n_ret_value = get_empty_nodes (tb, h) ) { + case CARRY_ON: + set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); + return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ + + case NO_DISK_SPACE: + case REPEAT_SEARCH: + return n_ret_value; + default: + reiserfs_panic(tb->tb_sb, "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes"); + } + } + + if ( (n_ret_value = get_parents (tb, h)) != CARRY_ON ) /* get parents of S[h] neighbors. */ + return n_ret_value; + + sfree = B_FREE_SPACE (Sh); + + /* get free space of neighbors */ + rfree = get_rfree (tb, h); + lfree = get_lfree (tb, h); + + if (can_node_be_removed (vn->vn_mode, lfree, sfree, rfree, tb, h) == NO_BALANCING_NEEDED) + /* and new item fits into node S[h] without any shifting */ + return NO_BALANCING_NEEDED; + + create_virtual_node (tb, h); + + /* + determine maximal number of items we can shift to the left neighbor (in tb structure) + and the maximal number of bytes that can flow to the left neighbor + from the left most liquid item that cannot be shifted from S[0] entirely (returned value) + */ + check_left (tb, h, lfree); + + /* + determine maximal number of items we can shift to the right neighbor (in tb structure) + and the maximal number of bytes that can flow to the right neighbor + from the right most liquid item that cannot be shifted from S[0] entirely (returned value) + */ + check_right (tb, h, rfree); + + + /* all contents of internal node S[h] can be moved into its + neighbors, S[h] will be removed after balancing */ + if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) { + int to_r; + + /* Since we are working on internal nodes, and our internal + nodes have fixed size entries, then we can balance by the + number of items rather than the space they consume. In this + routine we set the left node equal to the right node, + allowing a difference of less than or equal to 1 child + pointer. */ + to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 - + (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]); + set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1); + return CARRY_ON; + } + +#ifdef CONFIG_REISERFS_CHECK + /* this checks balance condition, that any two neighboring nodes can not fit in one node */ + if ( h && ( tb->lnum[h] >= vn->vn_nr_item + 1 || tb->rnum[h] >= vn->vn_nr_item + 1) ) + reiserfs_panic (tb->tb_sb, "vs-8220: ip_check_balance: tree is not balanced on internal level"); + + if ( ! h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) || + (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1)) )) + reiserfs_panic(tb->tb_sb, "vs-8225: ip_check_balance: tree is not balanced on leaf level"); +#endif + + /* all contents of S[0] can be moved into its neighbors + S[0] will be removed after balancing. */ + if (!h && is_leaf_removable (tb)) + return CARRY_ON; + + + /* why do we perform this check here rather than earlier?? + Answer: we can win 1 node in some cases above. Moreover we + checked it above, when we checked, that S[0] is not removable + in principle */ + if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ + if ( ! h ) + tb->s0num = vn->vn_nr_item; + set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); + return NO_BALANCING_NEEDED; + } + + + { + int lpar, rpar, nset, lset, rset, lrset; + /* + * regular overflowing of the node + */ + + /* get_num_ver works in 2 modes (FLOW & NO_FLOW) + lpar, rpar - number of items we can shift to left/right neighbor (including splitting item) + nset, lset, rset, lrset - shows, whether flowing items give better packing + */ +#define FLOW 1 +#define NO_FLOW 0 /* do not any splitting */ + + /* we choose one the following */ +#define NOTHING_SHIFT_NO_FLOW 0 +#define NOTHING_SHIFT_FLOW 5 +#define LEFT_SHIFT_NO_FLOW 10 +#define LEFT_SHIFT_FLOW 15 +#define RIGHT_SHIFT_NO_FLOW 20 +#define RIGHT_SHIFT_FLOW 25 +#define LR_SHIFT_NO_FLOW 30 +#define LR_SHIFT_FLOW 35 + + + lpar = tb->lnum[h]; + rpar = tb->rnum[h]; + + + /* calculate number of blocks S[h] must be split into when + nothing is shifted to the neighbors, + as well as number of items in each part of the split node (s012 numbers), + and number of bytes (s1bytes) of the shared drop which flow to S1 if any */ + nset = NOTHING_SHIFT_NO_FLOW; + nver = get_num_ver (vn->vn_mode, tb, h, + 0, -1, h?vn->vn_nr_item:0, -1, + snum012, NO_FLOW); + + if (!h) + { + int nver1; + + /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */ + nver1 = get_num_ver (vn->vn_mode, tb, h, + 0, -1, 0, -1, + snum012 + NOTHING_SHIFT_FLOW, FLOW); + if (nver > nver1) + nset = NOTHING_SHIFT_FLOW, nver = nver1; + } + + + /* calculate number of blocks S[h] must be split into when + l_shift_num first items and l_shift_bytes of the right most + liquid item to be shifted are shifted to the left neighbor, + as well as number of items in each part of the splitted node (s012 numbers), + and number of bytes (s1bytes) of the shared drop which flow to S1 if any + */ + lset = LEFT_SHIFT_NO_FLOW; + lnver = get_num_ver (vn->vn_mode, tb, h, + lpar - (( h || tb->lbytes == -1 ) ? 0 : 1), -1, h ? vn->vn_nr_item:0, -1, + snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW); + if (!h) + { + int lnver1; + + lnver1 = get_num_ver (vn->vn_mode, tb, h, + lpar - ((tb->lbytes != -1) ? 1 : 0), tb->lbytes, 0, -1, + snum012 + LEFT_SHIFT_FLOW, FLOW); + if (lnver > lnver1) + lset = LEFT_SHIFT_FLOW, lnver = lnver1; + } + + + /* calculate number of blocks S[h] must be split into when + r_shift_num first items and r_shift_bytes of the left most + liquid item to be shifted are shifted to the right neighbor, + as well as number of items in each part of the splitted node (s012 numbers), + and number of bytes (s1bytes) of the shared drop which flow to S1 if any + */ + rset = RIGHT_SHIFT_NO_FLOW; + rnver = get_num_ver (vn->vn_mode, tb, h, + 0, -1, h ? (vn->vn_nr_item-rpar) : (rpar - (( tb->rbytes != -1 ) ? 1 : 0)), -1, + snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW); + if (!h) + { + int rnver1; + + rnver1 = get_num_ver (vn->vn_mode, tb, h, + 0, -1, (rpar - ((tb->rbytes != -1) ? 1 : 0)), tb->rbytes, + snum012 + RIGHT_SHIFT_FLOW, FLOW); + + if (rnver > rnver1) + rset = RIGHT_SHIFT_FLOW, rnver = rnver1; + } + + + /* calculate number of blocks S[h] must be split into when + items are shifted in both directions, + as well as number of items in each part of the splitted node (s012 numbers), + and number of bytes (s1bytes) of the shared drop which flow to S1 if any + */ + lrset = LR_SHIFT_NO_FLOW; + lrnver = get_num_ver (vn->vn_mode, tb, h, + lpar - ((h || tb->lbytes == -1) ? 0 : 1), -1, h ? (vn->vn_nr_item-rpar):(rpar - ((tb->rbytes != -1) ? 1 : 0)), -1, + snum012 + LR_SHIFT_NO_FLOW, NO_FLOW); + if (!h) + { + int lrnver1; + + lrnver1 = get_num_ver (vn->vn_mode, tb, h, + lpar - ((tb->lbytes != -1) ? 1 : 0), tb->lbytes, (rpar - ((tb->rbytes != -1) ? 1 : 0)), tb->rbytes, + snum012 + LR_SHIFT_FLOW, FLOW); + if (lrnver > lrnver1) + lrset = LR_SHIFT_FLOW, lrnver = lrnver1; + } + + + + /* Our general shifting strategy is: + 1) to minimized number of new nodes; + 2) to minimized number of neighbors involved in shifting; + 3) to minimized number of disk reads; */ + + /* we can win TWO or ONE nodes by shifting in both directions */ + if (lrnver < lnver && lrnver < rnver) + { +#ifdef CONFIG_REISERFS_CHECK + if (h && (tb->lnum[h] != 1 || tb->rnum[h] != 1 || lrnver != 1 || rnver != 2 || lnver != 2 || h != 1)) + reiserfs_panic (0, "vs-8230: check_balance: bad h"); +#endif + if (lrset == LR_SHIFT_FLOW) + set_parameters (tb, h, tb->lnum[h], tb->rnum[h], lrnver, snum012 + lrset, + tb->lbytes, tb->rbytes); + else + set_parameters (tb, h, tb->lnum[h] - ((tb->lbytes == -1) ? 0 : 1), + tb->rnum[h] - ((tb->rbytes == -1) ? 0 : 1), lrnver, snum012 + lrset, -1, -1); + + return CARRY_ON; + } + + /* if shifting doesn't lead to better packing then don't shift */ + if (nver == lrnver) + { + set_parameters (tb, h, 0, 0, nver, snum012 + nset, -1, -1); + return CARRY_ON; + } + + + /* now we know that for better packing shifting in only one + direction either to the left or to the right is required */ + + /* if shifting to the left is better than shifting to the right */ + if (lnver < rnver) + { + SET_PAR_SHIFT_LEFT; + return CARRY_ON; + } + + /* if shifting to the right is better than shifting to the left */ + if (lnver > rnver) + { + SET_PAR_SHIFT_RIGHT; + return CARRY_ON; + } + + + /* now shifting in either direction gives the same number + of nodes and we can make use of the cached neighbors */ + if (is_left_neighbor_in_cache (tb,h)) + { + SET_PAR_SHIFT_LEFT; + return CARRY_ON; + } + + /* shift to the right independently on whether the right neighbor in cache or not */ + SET_PAR_SHIFT_RIGHT; + return CARRY_ON; + } +} + + +/* Check whether current node S[h] is balanced when Decreasing its size by + * Deleting or Cutting for INTERNAL node of S+tree. + * Calculate parameters for balancing for current level h. + * Parameters: + * tb tree_balance structure; + * h current level of the node; + * inum item number in S[h]; + * mode i - insert, p - paste; + * Returns: 1 - schedule occured; + * 0 - balancing for higher levels needed; + * -1 - no balancing for higher levels needed; + * -2 - no disk space. + * + * Note: Items of internal nodes have fixed size, so the balance condition for + * the internal part of S+tree is as for the B-trees. + */ +static int dc_check_balance_internal (struct tree_balance * tb, int h) +{ + struct virtual_node * vn = tb->tb_vn; + + /* Sh is the node whose balance is currently being checked, + and Fh is its father. */ + struct buffer_head * Sh, * Fh; + int maxsize, + n_ret_value; + int lfree, rfree /* free space in L and R */; + + Sh = PATH_H_PBUFFER (tb->tb_path, h); + Fh = PATH_H_PPARENT (tb->tb_path, h); + + maxsize = MAX_CHILD_SIZE(Sh); + +/* using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */ +/* new_nr_item = number of items node would have if operation is */ +/* performed without balancing (new_nr_item); */ + create_virtual_node (tb, h); + + if ( ! Fh ) + { /* S[h] is the root. */ + if ( vn->vn_nr_item > 0 ) + { + set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); + return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ + } + /* new_nr_item == 0. + * Current root will be deleted resulting in + * decrementing the tree height. */ + set_parameters (tb, h, 0, 0, 0, NULL, -1, -1); + return CARRY_ON; + } + + if ( (n_ret_value = get_parents(tb,h)) != CARRY_ON ) + return n_ret_value; + + + /* get free space of neighbors */ + rfree = get_rfree (tb, h); + lfree = get_lfree (tb, h); + + /* determine maximal number of items we can fit into neighbors */ + check_left (tb, h, lfree); + check_right (tb, h, rfree); + + + if ( vn->vn_nr_item >= MIN_NR_KEY(Sh) ) + { /* Balance condition for the internal node is valid. + * In this case we balance only if it leads to better packing. */ + if ( vn->vn_nr_item == MIN_NR_KEY(Sh) ) + { /* Here we join S[h] with one of its neighbors, + * which is impossible with greater values of new_nr_item. */ + if ( tb->lnum[h] >= vn->vn_nr_item + 1 ) + { + /* All contents of S[h] can be moved to L[h]. */ + int n; + int order_L; + + order_L = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; + n = B_N_CHILD(tb->FL[h],order_L)->dc_size / (DC_SIZE + KEY_SIZE); + set_parameters (tb, h, -n-1, 0, 0, NULL, -1, -1); + return CARRY_ON; + } + + if ( tb->rnum[h] >= vn->vn_nr_item + 1 ) + { + /* All contents of S[h] can be moved to R[h]. */ + int n; + int order_R; + + order_R = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==B_NR_ITEMS(Fh)) ? 0 : n + 1; + n = B_N_CHILD(tb->FR[h],order_R)->dc_size / (DC_SIZE + KEY_SIZE); + set_parameters (tb, h, 0, -n-1, 0, NULL, -1, -1); + return CARRY_ON; + } + } + + if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) + { + /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ + int to_r; + + to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 - + (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]); + set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1); + return CARRY_ON; + } + + /* Balancing does not lead to better packing. */ + set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); + return NO_BALANCING_NEEDED; + } + + /* Current node contain insufficient number of items. Balancing is required. */ + /* Check whether we can merge S[h] with left neighbor. */ + if (tb->lnum[h] >= vn->vn_nr_item + 1) + if (is_left_neighbor_in_cache (tb,h) || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) + { + int n; + int order_L; + + order_L = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; + n = B_N_CHILD(tb->FL[h],order_L)->dc_size / (DC_SIZE + KEY_SIZE); + set_parameters (tb, h, -n-1, 0, 0, NULL, -1, -1); + return CARRY_ON; + } + + /* Check whether we can merge S[h] with right neighbor. */ + if (tb->rnum[h] >= vn->vn_nr_item + 1) + { + int n; + int order_R; + + order_R = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==B_NR_ITEMS(Fh)) ? 0 : (n + 1); + n = B_N_CHILD(tb->FR[h],order_R)->dc_size / (DC_SIZE + KEY_SIZE); + set_parameters (tb, h, 0, -n-1, 0, NULL, -1, -1); + return CARRY_ON; + } + + /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ + if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) + { + int to_r; + + to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 - + (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]); + set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1); + return CARRY_ON; + } + + /* For internal nodes try to borrow item from a neighbor */ +#ifdef CONFIG_REISERFS_CHECK + if (!tb->FL[h] && !tb->FR[h]) + reiserfs_panic (0, "vs-8235: dc_check_balance_internal: trying to borrow for root"); +#endif + + /* Borrow one or two items from caching neighbor */ + if (is_left_neighbor_in_cache (tb,h) || !tb->FR[h]) + { + int from_l; + + from_l = (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item + 1) / 2 - (vn->vn_nr_item + 1); + set_parameters (tb, h, -from_l, 0, 1, NULL, -1, -1); + return CARRY_ON; + } + + set_parameters (tb, h, 0, -((MAX_NR_KEY(Sh)+1-tb->rnum[h]+vn->vn_nr_item+1)/2-(vn->vn_nr_item+1)), 1, + NULL, -1, -1); + return CARRY_ON; +} + + +/* Check whether current node S[h] is balanced when Decreasing its size by + * Deleting or Truncating for LEAF node of S+tree. + * Calculate parameters for balancing for current level h. + * Parameters: + * tb tree_balance structure; + * h current level of the node; + * inum item number in S[h]; + * mode i - insert, p - paste; + * Returns: 1 - schedule occured; + * 0 - balancing for higher levels needed; + * -1 - no balancing for higher levels needed; + * -2 - no disk space. + */ +static int dc_check_balance_leaf (struct tree_balance * tb, int h) +{ + struct virtual_node * vn = tb->tb_vn; + + /* Number of bytes that must be deleted from + (value is negative if bytes are deleted) buffer which + contains node being balanced. The mnemonic is that the + attempted change in node space used level is levbytes bytes. */ + int levbytes; + /* the maximal item size */ + int maxsize, + n_ret_value; + /* S0 is the node whose balance is currently being checked, + and F0 is its father. */ + struct buffer_head * S0, * F0; + int lfree, rfree /* free space in L and R */; + + S0 = PATH_H_PBUFFER (tb->tb_path, 0); + F0 = PATH_H_PPARENT (tb->tb_path, 0); + + levbytes = tb->insert_size[h]; + + maxsize = MAX_CHILD_SIZE(S0); /* maximal possible size of an item */ + + if ( ! F0 ) + { /* S[0] is the root now. */ + +#ifdef CONFIG_REISERFS_CHECK + if ( -levbytes >= maxsize - B_FREE_SPACE (S0) ) + reiserfs_panic (tb->tb_sb, "vs-8240: dc_check_balance_leaf: attempt to create empty buffer tree"); +#endif + + set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); + return NO_BALANCING_NEEDED; + } + + if ( (n_ret_value = get_parents(tb,h)) != CARRY_ON ) + return n_ret_value; + + /* get free space of neighbors */ + rfree = get_rfree (tb, h); + lfree = get_lfree (tb, h); + + create_virtual_node (tb, h); + + /* if 3 leaves can be merge to one, set parameters and return */ + if (are_leaves_removable (tb, lfree, rfree)) + return CARRY_ON; + + /* determine maximal number of items we can shift to the left/right neighbor + and the maximal number of bytes that can flow to the left/right neighbor + from the left/right most liquid item that cannot be shifted from S[0] entirely + */ + check_left (tb, h, lfree); + check_right (tb, h, rfree); + + /* check whether we can merge S with left neighbor. */ + if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1) + if (is_left_neighbor_in_cache (tb,h) || + ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */ + !tb->FR[h]) { + +#ifdef CONFIG_REISERFS_CHECK + if (!tb->FL[h]) + reiserfs_panic (0, "vs-8245: dc_check_balance_leaf: FL[h] must exist"); +#endif + + /* set parameter to merge S[0] with its left neighbor */ + set_parameters (tb, h, -1, 0, 0, NULL, -1, -1); + return CARRY_ON; + } + + /* check whether we can merge S[0] with right neighbor. */ + if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) { + set_parameters (tb, h, 0, -1, 0, NULL, -1, -1); + return CARRY_ON; + } + + /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */ + if (is_leaf_removable (tb)) + return CARRY_ON; + + /* Balancing is not required. */ + tb->s0num = vn->vn_nr_item; + set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); + return NO_BALANCING_NEEDED; +} + + + +/* Check whether current node S[h] is balanced when Decreasing its size by + * Deleting or Cutting. + * Calculate parameters for balancing for current level h. + * Parameters: + * tb tree_balance structure; + * h current level of the node; + * inum item number in S[h]; + * mode d - delete, c - cut. + * Returns: 1 - schedule occured; + * 0 - balancing for higher levels needed; + * -1 - no balancing for higher levels needed; + * -2 - no disk space. + */ +static int dc_check_balance (struct tree_balance * tb, int h) +{ + +#ifdef CONFIG_REISERFS_CHECK + if ( ! (PATH_H_PBUFFER (tb->tb_path, h)) ) + reiserfs_panic(tb->tb_sb, "vs-8250: dc_check_balance: S is not initialized"); +#endif + + if ( h ) + return dc_check_balance_internal (tb, h); + else + return dc_check_balance_leaf (tb, h); +} + + + +/* Check whether current node S[h] is balanced. + * Calculate parameters for balancing for current level h. + * Parameters: + * + * tb tree_balance structure: + * + * tb is a large structure that must be read about in the header file + * at the same time as this procedure if the reader is to successfully + * understand this procedure + * + * h current level of the node; + * inum item number in S[h]; + * mode i - insert, p - paste, d - delete, c - cut. + * Returns: 1 - schedule occured; + * 0 - balancing for higher levels needed; + * -1 - no balancing for higher levels needed; + * -2 - no disk space. + */ +static int check_balance (int mode, + struct tree_balance * tb, + int h, + int inum, + int pos_in_item, + struct item_head * ins_ih, + const void * data + ) +{ + struct virtual_node * vn; + + vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf); + vn->vn_free_ptr = (char *)(tb->tb_vn + 1); + vn->vn_mode = mode; + vn->vn_affected_item_num = inum; + vn->vn_pos_in_item = pos_in_item; + vn->vn_ins_ih = ins_ih; + vn->vn_data = data; + +#ifdef CONFIG_REISERFS_CHECK + if (mode == M_INSERT && !vn->vn_ins_ih) + reiserfs_panic (0, "vs-8255: check_balance: ins_ih can not be 0 in insert mode"); +#endif + + if ( tb->insert_size[h] > 0 ) + /* Calculate balance parameters when size of node is increasing. */ + return ip_check_balance (tb, h); + + /* Calculate balance parameters when size of node is decreasing. */ + return dc_check_balance (tb, h); +} + + + +/* Check whether parent at the path is the really parent of the current node.*/ +static int get_direct_parent( + struct tree_balance * p_s_tb, + int n_h + ) { + struct buffer_head * p_s_bh; + struct path * p_s_path = p_s_tb->tb_path; + int n_position, + n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); + + /* We are in the root or in the new root. */ + if ( n_path_offset <= FIRST_PATH_ELEMENT_OFFSET ) { + +#ifdef CONFIG_REISERFS_CHECK + if ( n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1 ) + reiserfs_panic(p_s_tb->tb_sb, "PAP-8260: get_direct_parent: illegal offset in the path"); +#endif + + if ( PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == + SB_ROOT_BLOCK (p_s_tb->tb_sb) ) { + /* Root is not changed. */ + PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL; + PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0; + return CARRY_ON; + } + return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */ + } + + if ( ! B_IS_IN_TREE(p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1)) ) + return REPEAT_SEARCH; /* Parent in the path is not in the tree. */ + + if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1)) > B_NR_ITEMS(p_s_bh) ) + return REPEAT_SEARCH; + + if ( B_N_CHILD_NUM(p_s_bh, n_position) != PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr ) + /* Parent in the path is not parent of the current node in the tree. */ + return REPEAT_SEARCH; + + if ( buffer_locked(p_s_bh) ) { + __wait_on_buffer(p_s_bh); + if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) + return REPEAT_SEARCH; + } + + return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */ +} + + +/* Using lnum[n_h] and rnum[n_h] we should determine what neighbors + * of S[n_h] we + * need in order to balance S[n_h], and get them if necessary. + * Returns: SCHEDULE_OCCURRED - schedule occured while the function worked; + * CARRY_ON - schedule didn't occur while the function worked; + */ +static int get_neighbors( + struct tree_balance * p_s_tb, + int n_h + ) { + int n_child_position, + n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1); + unsigned long n_son_number; + struct super_block * p_s_sb = p_s_tb->tb_sb; + struct buffer_head * p_s_bh; + + + if ( p_s_tb->lnum[n_h] ) { + /* We need left neighbor to balance S[n_h]. */ + p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); + +#ifdef CONFIG_REISERFS_CHECK + if ( p_s_bh == p_s_tb->FL[n_h] && ! PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset) ) + reiserfs_panic (p_s_tb->tb_sb, "PAP-8270: get_neighbors: invalid position in the parent"); +#endif + + n_child_position = ( p_s_bh == p_s_tb->FL[n_h] ) ? p_s_tb->lkey[n_h] : B_NR_ITEMS (p_s_tb->FL[n_h]); + n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position); + p_s_bh = reiserfs_bread(p_s_sb->s_dev, n_son_number, p_s_sb->s_blocksize); + if (!p_s_bh) + return IO_ERROR; + if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { + decrement_bcount(p_s_bh); + return REPEAT_SEARCH; + } + +#ifdef CONFIG_REISERFS_CHECK + if ( ! B_IS_IN_TREE(p_s_tb->FL[n_h]) || n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) || + B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) != p_s_bh->b_blocknr ) + reiserfs_panic (p_s_tb->tb_sb, "PAP-8275: get_neighbors: invalid parent"); + if ( ! B_IS_IN_TREE(p_s_bh) ) + reiserfs_panic (p_s_tb->tb_sb, "PAP-8280: get_neighbors: invalid child"); + + if (! n_h && B_FREE_SPACE (p_s_bh) != MAX_CHILD_SIZE (p_s_bh) - B_N_CHILD (p_s_tb->FL[0],n_child_position)->dc_size) + reiserfs_panic (p_s_tb->tb_sb, "PAP-8290: get_neighbors: invalid child size of left neighbor"); +#endif + + decrement_bcount(p_s_tb->L[n_h]); + p_s_tb->L[n_h] = p_s_bh; + } + + + if ( p_s_tb->rnum[n_h] ) { /* We need right neighbor to balance S[n_path_offset]. */ + p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); + +#ifdef CONFIG_REISERFS_CHECK + if ( p_s_bh == p_s_tb->FR[n_h] && PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset) >= B_NR_ITEMS(p_s_bh) ) + reiserfs_panic (p_s_tb->tb_sb, "PAP-8295: get_neighbors: invalid position in the parent"); +#endif + + n_child_position = ( p_s_bh == p_s_tb->FR[n_h] ) ? p_s_tb->rkey[n_h] + 1 : 0; + n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position); + p_s_bh = reiserfs_bread(p_s_sb->s_dev, n_son_number, p_s_sb->s_blocksize); + if (!p_s_bh) + return IO_ERROR; + if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { + decrement_bcount(p_s_bh); + return REPEAT_SEARCH; + } + decrement_bcount(p_s_tb->R[n_h]); + p_s_tb->R[n_h] = p_s_bh; + +#ifdef CONFIG_REISERFS_CHECK + if (! n_h && B_FREE_SPACE (p_s_bh) != MAX_CHILD_SIZE (p_s_bh) - B_N_CHILD (p_s_tb->FR[0],n_child_position)->dc_size) { + reiserfs_panic (p_s_tb->tb_sb, "PAP-8300: get_neighbors: invalid child size of right neighbor (%d != %d - %d)", + B_FREE_SPACE (p_s_bh), MAX_CHILD_SIZE (p_s_bh), B_N_CHILD (p_s_tb->FR[0],n_child_position)->dc_size); + } +#endif + + } + return CARRY_ON; +} + + +void * reiserfs_kmalloc (size_t size, int flags, struct super_block * s) +{ + void * vp; + static size_t malloced; + + + vp = kmalloc (size, flags); + if (vp) { + s->u.reiserfs_sb.s_kmallocs += size; + if (s->u.reiserfs_sb.s_kmallocs > malloced + 200000) { + reiserfs_warning ("vs-8301: reiserfs_kmalloc: allocated memory %d\n", s->u.reiserfs_sb.s_kmallocs); + malloced = s->u.reiserfs_sb.s_kmallocs; + } + } +/*printk ("malloc : size %d, allocated %d\n", size, s->u.reiserfs_sb.s_kmallocs);*/ + return vp; +} + +void reiserfs_kfree (const void * vp, size_t size, struct super_block * s) +{ + kfree (vp); + + s->u.reiserfs_sb.s_kmallocs -= size; + if (s->u.reiserfs_sb.s_kmallocs < 0) + reiserfs_warning ("vs-8302: reiserfs_kfree: allocated memory %d\n", s->u.reiserfs_sb.s_kmallocs); + +} + + +static int get_virtual_node_size (struct super_block * sb, struct buffer_head * bh) +{ + // int size = sizeof (struct virtual_item); /* for new item in case of insert */ + // int i, nr_items; + // struct item_head * ih; + + // this is enough for _ALL_ currently possible cases. In 4 k block + // one may put < 170 empty items. Each virtual item eats 12 + // byte. The biggest direntry item may have < 256 entries. Each + // entry would eat 2 byte of virtual node space + return sb->s_blocksize; + +#if 0 + size = sizeof (struct virtual_node) + sizeof (struct virtual_item); + ih = B_N_PITEM_HEAD (bh, 0); + nr_items = B_NR_ITEMS (bh); + for (i = 0; i < nr_items; i ++, ih ++) { + /* each item occupies some space in virtual node */ + size += sizeof (struct virtual_item); + if (is_direntry_le_ih (ih)) + /* each entry and new one occupeis 2 byte in the virtual node */ + size += (le16_to_cpu (ih->u.ih_entry_count) + 1) * sizeof (__u16); + } + + /* 1 bit for each bitmap block to note whether bitmap block was + dirtied in the operation */ + /* size += (SB_BMAP_NR (sb) * 2 / 8 + 4);*/ + return size; +#endif +} + + + +/* maybe we should fail balancing we are going to perform when kmalloc + fails several times. But now it will loop until kmalloc gets + required memory */ +static int get_mem_for_virtual_node (struct tree_balance * tb) +{ + int check_fs = 0; + int size; + char * buf; + + size = get_virtual_node_size (tb->tb_sb, PATH_PLAST_BUFFER (tb->tb_path)); + + if (size > tb->vn_buf_size) { + /* we have to allocate more memory for virtual node */ + if (tb->vn_buf) { + /* free memory allocated before */ + reiserfs_kfree (tb->vn_buf, tb->vn_buf_size, tb->tb_sb); + /* this is not needed if kfree is atomic */ + check_fs = 1; + } + + /* virtual node requires now more memory */ + tb->vn_buf_size = size; + + /* get memory for virtual item */ + buf = reiserfs_kmalloc(size, GFP_ATOMIC, tb->tb_sb); + if ( ! buf ) { + /* getting memory with GFP_KERNEL priority may involve + balancing now (due to indirect_to_direct conversion on + dcache shrinking). So, release path and collected + resourses here */ + free_buffers_in_tb (tb); + buf = reiserfs_kmalloc(size, GFP_BUFFER, tb->tb_sb); + if ( !buf ) { +#ifdef CONFIG_REISERFS_CHECK + reiserfs_warning ("vs-8345: get_mem_for_virtual_node: " + "kmalloc failed. reiserfs kmalloced %d bytes\n", + tb->tb_sb->u.reiserfs_sb.s_kmallocs); +#endif + tb->vn_buf_size = 0; + } + tb->vn_buf = buf; + schedule() ; + return REPEAT_SEARCH; + } + + tb->vn_buf = buf; + } + + if ( check_fs && FILESYSTEM_CHANGED_TB (tb) ) + return REPEAT_SEARCH; + + return CARRY_ON; +} + + +#ifdef CONFIG_REISERFS_CHECK +static void tb_buffer_sanity_check (struct super_block * p_s_sb, + struct buffer_head * p_s_bh, + const char *descr, int level) { + if (p_s_bh) { + if (atomic_read (&(p_s_bh->b_count)) <= 0) { + + reiserfs_panic (p_s_sb, "tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n", descr, level, p_s_bh); + } + + if ( ! buffer_uptodate (p_s_bh) ) { + reiserfs_panic (p_s_sb, "tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n", descr, level, p_s_bh); + } + + if ( ! B_IS_IN_TREE (p_s_bh) ) { + reiserfs_panic (p_s_sb, "tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n", descr, level, p_s_bh); + } + + if (p_s_bh->b_dev != p_s_sb->s_dev || + p_s_bh->b_size != p_s_sb->s_blocksize || + p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) { + reiserfs_panic (p_s_sb, "tb_buffer_sanity_check(): check failed for buffer %s[%d] (%b)\n", descr, level, p_s_bh); + } + } +} +#endif + +static void clear_all_dirty_bits(struct super_block *s, + struct buffer_head *bh) { + reiserfs_prepare_for_journal(s, bh, 0) ; +} + +static int wait_tb_buffers_until_unlocked (struct tree_balance * p_s_tb) +{ + struct buffer_head * locked; +#ifdef CONFIG_REISERFS_CHECK + int repeat_counter = 0; +#endif + int i; + + do { + + locked = NULL; + + for ( i = p_s_tb->tb_path->path_length; !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i-- ) { + if ( PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i) ) { + /* if I understand correctly, we can only be sure the last buffer + ** in the path is in the tree --clm + */ +#ifdef CONFIG_REISERFS_CHECK + if (PATH_PLAST_BUFFER(p_s_tb->tb_path) == + PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) { + tb_buffer_sanity_check (p_s_tb->tb_sb, + PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i), + "S", + p_s_tb->tb_path->path_length - i); + } +#endif + clear_all_dirty_bits(p_s_tb->tb_sb, + PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i)) ; + + if ( buffer_locked (PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i)) ) + locked = PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i); + } + } + + for ( i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i]; i++ ) { + + if (p_s_tb->lnum[i] ) { + + if ( p_s_tb->L[i] ) { +#ifdef CONFIG_REISERFS_CHECK + tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->L[i], "L", i); +#endif + clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->L[i]) ; + if ( buffer_locked (p_s_tb->L[i]) ) + locked = p_s_tb->L[i]; + } + + if ( !locked && p_s_tb->FL[i] ) { +#ifdef CONFIG_REISERFS_CHECK + tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->FL[i], "FL", i); +#endif + clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FL[i]) ; + if ( buffer_locked (p_s_tb->FL[i]) ) + locked = p_s_tb->FL[i]; + } + + if ( !locked && p_s_tb->CFL[i] ) { +#ifdef CONFIG_REISERFS_CHECK + tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->CFL[i], "CFL", i); +#endif + clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->CFL[i]) ; + if ( buffer_locked (p_s_tb->CFL[i]) ) + locked = p_s_tb->CFL[i]; + } + + } + + if ( !locked && (p_s_tb->rnum[i]) ) { + + if ( p_s_tb->R[i] ) { +#ifdef CONFIG_REISERFS_CHECK + tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->R[i], "R", i); +#endif + clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->R[i]) ; + if ( buffer_locked (p_s_tb->R[i]) ) + locked = p_s_tb->R[i]; + } + + + if ( !locked && p_s_tb->FR[i] ) { +#ifdef CONFIG_REISERFS_CHECK + tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->FR[i], "FR", i); +#endif + clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FR[i]) ; + if ( buffer_locked (p_s_tb->FR[i]) ) + locked = p_s_tb->FR[i]; + } + + if ( !locked && p_s_tb->CFR[i] ) { +#ifdef CONFIG_REISERFS_CHECK + tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->CFR[i], "CFR", i); +#endif + clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->CFR[i]) ; + if ( buffer_locked (p_s_tb->CFR[i]) ) + locked = p_s_tb->CFR[i]; + } + } + } + /* as far as I can tell, this is not required. The FEB list seems + ** to be full of newly allocated nodes, which will never be locked, + ** dirty, or anything else. + ** To be safe, I'm putting in the checks and waits in. For the moment, + ** they are needed to keep the code in journal.c from complaining + ** about the buffer. That code is inside CONFIG_REISERFS_CHECK as well. + ** --clm + */ + for ( i = 0; !locked && i < MAX_FEB_SIZE; i++ ) { + if ( p_s_tb->FEB[i] ) { + clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FEB[i]) ; + if (buffer_locked(p_s_tb->FEB[i])) { + locked = p_s_tb->FEB[i] ; + } + } + } + + if (locked) { +#ifdef CONFIG_REISERFS_CHECK + repeat_counter++; + if ( (repeat_counter % 10000) == 0) { + reiserfs_warning ("wait_tb_buffers_until_released(): too many iterations waiting for buffer to unlock (%b)\n", locked); + + /* Don't loop forever. Try to recover from possible error. */ + + return ( FILESYSTEM_CHANGED_TB (p_s_tb) ) ? REPEAT_SEARCH : CARRY_ON; + } +#endif + __wait_on_buffer (locked); + if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { + return REPEAT_SEARCH; + } + } + + } while (locked); + + return CARRY_ON; +} + + +/* Prepare for balancing, that is + * get all necessary parents, and neighbors; + * analyze what and where should be moved; + * get sufficient number of new nodes; + * Balancing will start only after all resources will be collected at a time. + * + * When ported to SMP kernels, only at the last moment after all needed nodes + * are collected in cache, will the resources be locked using the usual + * textbook ordered lock acquisition algorithms. Note that ensuring that + * this code neither write locks what it does not need to write lock nor locks out of order + * will be a pain in the butt that could have been avoided. Grumble grumble. -Hans + * + * fix is meant in the sense of render unchanging + * + * Latency might be improved by first gathering a list of what buffers are needed + * and then getting as many of them in parallel as possible? -Hans + * + * Parameters: + * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append) + * tb tree_balance structure; + * inum item number in S[h]; + * pos_in_item - comment this if you can + * ins_ih & ins_sd are used when inserting + * Returns: 1 - schedule occurred while the function worked; + * 0 - schedule didn't occur while the function worked; + * -1 - if no_disk_space + */ + + +int fix_nodes (int n_op_mode, + struct tree_balance * p_s_tb, + struct item_head * p_s_ins_ih, // item head of item being inserted + const void * data // inserted item or data to be pasted + ) { + int n_ret_value, + n_h, + n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path); + int n_pos_in_item; + + /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared + ** during wait_tb_buffers_run + */ + int wait_tb_buffers_run = 0 ; + int windex ; + struct buffer_head * p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path); + + n_pos_in_item = p_s_tb->tb_path->pos_in_item; + + + p_s_tb->fs_gen = get_generation (p_s_tb->tb_sb); + + /* if it possible in indirect_to_direct conversion */ + if (buffer_locked (p_s_tbS0)) { + __wait_on_buffer (p_s_tbS0); + if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) + return REPEAT_SEARCH; + } + +#ifndef __KERNEL__ + if ( atomic_read (&(p_s_tbS0->b_count)) > 1 || + (p_s_tb->L[0] && atomic_read (&(p_s_tb->L[0]->b_count)) > 1) || + (p_s_tb->R[0] && atomic_read (&(p_s_tb->R[0]->b_count)) > 1) ) { + printk ("mode=%c, insert_size=%d\n", n_op_mode, p_s_tb->insert_size[0]); + print_cur_tb ("first three parameters are invalid"); + reiserfs_panic (p_s_tb->tb_sb, "PAP-8310: fix_nodes: all buffers must be hold once in one thread processing"); + } +#endif + +#ifdef CONFIG_REISERFS_CHECK + if ( cur_tb ) { + print_cur_tb ("fix_nodes"); + reiserfs_panic(p_s_tb->tb_sb,"PAP-8305: fix_nodes: there is pending do_balance"); + } + + if (!buffer_uptodate (p_s_tbS0) || !B_IS_IN_TREE (p_s_tbS0)) { + reiserfs_panic (p_s_tb->tb_sb, "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate " + "at the beginning of fix_nodes or not in tree (mode %c)", p_s_tbS0, p_s_tbS0, n_op_mode); + } + + // FIXME: new items have to be of 8 byte multiples. Including new + // directory items those look like old ones + /* + if (p_s_tb->insert_size[0] % 8) + reiserfs_panic (p_s_tb->tb_sb, "vs-: fix_nodes: incorrect insert_size %d, " + "mode %c", + p_s_tb->insert_size[0], n_op_mode); + */ + + /* Check parameters. */ + switch (n_op_mode) { +#ifdef REISERFS_FSCK + case M_INTERNAL: + break; + case M_INSERT: + if ( n_item_num < 0 || n_item_num > B_NR_ITEMS(p_s_tbS0) ) + reiserfs_panic(p_s_tb->tb_sb,"PAP-8325: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert", + n_item_num, B_NR_ITEMS(p_s_tbS0)); +#else + case M_INSERT: + if ( n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0) ) + reiserfs_panic(p_s_tb->tb_sb,"PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert", + n_item_num, B_NR_ITEMS(p_s_tbS0)); +#endif + break; + case M_PASTE: + case M_DELETE: + case M_CUT: + if ( n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0) ) { + print_block (p_s_tbS0, 0, -1, -1); + printk("mode = %c insert_size = %d\n", n_op_mode, p_s_tb->insert_size[0]); + reiserfs_panic(p_s_tb->tb_sb,"PAP-8335: fix_nodes: Incorrect item number(%d)", n_item_num); + } + break; + default: + reiserfs_panic(p_s_tb->tb_sb,"PAP-8340: fix_nodes: Incorrect mode of operation"); + } +#endif + + if (get_mem_for_virtual_node (p_s_tb) == REPEAT_SEARCH) + // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat + return REPEAT_SEARCH; + + + /* Starting from the leaf level; for all levels n_h of the tree. */ + for ( n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++ ) { + if ( (n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON ) { + goto repeat; + return n_ret_value; + } + + if ( (n_ret_value = check_balance (n_op_mode, p_s_tb, n_h, n_item_num, + n_pos_in_item, p_s_ins_ih, data)) != CARRY_ON ) { + if ( n_ret_value == NO_BALANCING_NEEDED ) { + /* No balancing for higher levels needed. */ + if ( (n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON ) { + goto repeat; + return n_ret_value; + } + if ( n_h != MAX_HEIGHT - 1 ) + p_s_tb->insert_size[n_h + 1] = 0; + /* ok, analysis and resource gathering are complete */ + break; + } + goto repeat; + return n_ret_value; + } + + if ( (n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON ) { + goto repeat; + return n_ret_value; + } + + if ( (n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON ) { + goto repeat; + return n_ret_value; /* No disk space, or schedule occurred and + analysis may be invalid and needs to be redone. */ + } + + if ( ! PATH_H_PBUFFER(p_s_tb->tb_path, n_h) ) { + /* We have a positive insert size but no nodes exist on this + level, this means that we are creating a new root. */ + +#ifdef CONFIG_REISERFS_CHECK + if ( p_s_tb->blknum[n_h] != 1 ) + reiserfs_panic(p_s_tb->tb_sb,"PAP-8350: fix_nodes: creating new empty root"); +#endif /* CONFIG_REISERFS_CHECK */ + + if ( n_h < MAX_HEIGHT - 1 ) + p_s_tb->insert_size[n_h + 1] = 0; + } + else + if ( ! PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1) ) { + if ( p_s_tb->blknum[n_h] > 1 ) { + /* The tree needs to be grown, so this node S[n_h] + which is the root node is split into two nodes, + and a new node (S[n_h+1]) will be created to + become the root node. */ + +#ifdef CONFIG_REISERFS_CHECK + if ( n_h == MAX_HEIGHT - 1 ) + reiserfs_panic(p_s_tb->tb_sb, "PAP-8355: fix_nodes: attempt to create too high of a tree"); +#endif /* CONFIG_REISERFS_CHECK */ + + p_s_tb->insert_size[n_h + 1] = (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) + DC_SIZE; + } + else + if ( n_h < MAX_HEIGHT - 1 ) + p_s_tb->insert_size[n_h + 1] = 0; + } + else + p_s_tb->insert_size[n_h + 1] = (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1); + } + + + windex = push_journal_writer("fix_nodes") ; + if ((n_ret_value = wait_tb_buffers_until_unlocked (p_s_tb)) == CARRY_ON) { + pop_journal_writer(windex) ; + if (FILESYSTEM_CHANGED_TB(p_s_tb)) { + wait_tb_buffers_run = 1 ; + n_ret_value = REPEAT_SEARCH ; + goto repeat; + } else { + return CARRY_ON; + } + } else { + wait_tb_buffers_run = 1 ; + pop_journal_writer(windex) ; + goto repeat; + } + + repeat: + // fix_nodes was unable to perform its calculation due to + // filesystem got changed under us, lack of free disk space or i/o + // failure. If the first is the case - the search will be + // repeated. For now - free all resources acquired so far except + // for the new allocated nodes + { + int i; + + /* Release path buffers. */ + if (wait_tb_buffers_run) { + pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path) ; + } else { + pathrelse (p_s_tb->tb_path); + } + /* brelse all resources collected for balancing */ + for ( i = 0; i < MAX_HEIGHT; i++ ) { + if (wait_tb_buffers_run) { + reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->L[i]); + reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->R[i]); + reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->FL[i]); + reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->FR[i]); + reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->CFL[i]); + reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->CFR[i]); + } + + brelse (p_s_tb->L[i]);p_s_tb->L[i] = 0; + brelse (p_s_tb->R[i]);p_s_tb->R[i] = 0; + brelse (p_s_tb->FL[i]);p_s_tb->FL[i] = 0; + brelse (p_s_tb->FR[i]);p_s_tb->FR[i] = 0; + brelse (p_s_tb->CFL[i]);p_s_tb->CFL[i] = 0; + brelse (p_s_tb->CFR[i]);p_s_tb->CFR[i] = 0; + } + +#if 0 // keep new allocated nodes + if (wait_tb_buffers_run) { + for ( i = 0; i < MAX_FEB_SIZE; i++ ) { + if ( p_s_tb->FEB[i] ) { + reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, + p_s_tb->FEB[i]) ; + } + } + } +#endif + return n_ret_value; + } + +} + + +/* Anatoly will probably forgive me renaming p_s_tb to tb. I just + wanted to make lines shorter */ +void unfix_nodes (struct tree_balance * tb) +{ + int i; + +#ifdef CONFIG_REISERFS_CHECK + if ( ! tb->vn_buf ) + reiserfs_panic (tb->tb_sb, + "PAP-16050: unfix_nodes: pointer to the virtual node is NULL"); +#endif + + /* Release path buffers. */ + pathrelse_and_restore (tb->tb_sb, tb->tb_path); + + /* brelse all resources collected for balancing */ + for ( i = 0; i < MAX_HEIGHT; i++ ) { + reiserfs_restore_prepared_buffer (tb->tb_sb, tb->L[i]); + reiserfs_restore_prepared_buffer (tb->tb_sb, tb->R[i]); + reiserfs_restore_prepared_buffer (tb->tb_sb, tb->FL[i]); + reiserfs_restore_prepared_buffer (tb->tb_sb, tb->FR[i]); + reiserfs_restore_prepared_buffer (tb->tb_sb, tb->CFL[i]); + reiserfs_restore_prepared_buffer (tb->tb_sb, tb->CFR[i]); + + brelse (tb->L[i]); + brelse (tb->R[i]); + brelse (tb->FL[i]); + brelse (tb->FR[i]); + brelse (tb->CFL[i]); + brelse (tb->CFR[i]); + } + + /* deal with list of allocated (used and unused) nodes */ + for ( i = 0; i < MAX_FEB_SIZE; i++ ) { + if ( tb->FEB[i] ) { + unsigned long blocknr = tb->FEB[i]->b_blocknr ; + /* de-allocated block which was not used by balancing and + bforget about buffer for it */ + brelse (tb->FEB[i]); + reiserfs_free_block (tb->transaction_handle, blocknr); + } + if (tb->used[i]) { + /* release used as new nodes including a new root */ + brelse (tb->used[i]); + } + } + +#if 0 /* shouldn't this be in CONFIG_REISERFS_CHECK??? */ + /* make sure, that all we have released got really freed */ + for (i = 0; i < sizeof (tb->thrown) / sizeof (tb->thrown[0]); i ++) + if (tb->thrown[i]) { + if (atomic_read (&(tb->thrown[i]->b_count))) { + /* the log will have the count at one and the buffers marked */ + if (atomic_read(&(tb->thrown[i]->b_count)) > 1 || + !(buffer_journaled(tb->thrown[i]) || + buffer_journal_dirty(tb->thrown[i]))) { + foo_print (tb->thrown[i], tb->tb_sb); + printk ("unfix_nodes: Waiting...(block %lu, count %d)\n", + tb->thrown[i]->b_blocknr, + atomic_read (&(tb->thrown[i]->b_count))); + wait_buffer_until_released (tb->thrown[i]); + printk ("unfix_nodes: Done (block %lu, count %d)\n", + tb->thrown[i]->b_blocknr, + atomic_read (&(tb->thrown[i]->b_count))); + } + } + } +#endif /* 0 */ + reiserfs_kfree (tb->vn_buf, tb->vn_buf_size, tb->tb_sb); + +} + + + +#ifndef REISERFS_FSCK + +// is_left_mergeable is now one of the item methods + +#else + +// this works only in fsck + +int are_items_mergeable (struct item_head * left, struct item_head * right, int bsize) +{ + if (comp_keys (&left->ih_key, &right->ih_key) != -1) { + reiserfs_panic (0, "vs-16070: are_items_mergeable: left %k, right %k", &(left->ih_key), &(right->ih_key)); + } + + if (comp_short_keys (&left->ih_key, &right->ih_key)) + return 0; + + if (I_IS_DIRECTORY_ITEM (left)) { + return 1; + } + + if ((I_IS_DIRECT_ITEM (left) && I_IS_DIRECT_ITEM (right)) || + (I_IS_INDIRECT_ITEM (left) && I_IS_INDIRECT_ITEM (right))) + return (left->ih_key.k_offset + I_BYTES_NUMBER (left, bsize) == right->ih_key.k_offset) ? 1 : 0; + + return 0; +} + +/* get left neighbor of the leaf node */ +static struct buffer_head * get_left_neighbor (struct super_block * s, struct path * path) +{ + struct key key; + INITIALIZE_PATH (path_to_left_neighbor); + struct buffer_head * bh; + + copy_key (&key, B_N_PKEY (PATH_PLAST_BUFFER (path), 0)); + decrement_key (&key); + +/* init_path (&path_to_left_neighbor);*/ + search_by_key (s, &key, &path_to_left_neighbor, DISK_LEAF_NODE_LEVEL, READ_BLOCKS); + // FIXME: fsck is to handle I/O failures somehow as well + if (PATH_LAST_POSITION (&path_to_left_neighbor) == 0) { + pathrelse (&path_to_left_neighbor); + return 0; + } + bh = PATH_PLAST_BUFFER (&path_to_left_neighbor); + bh->b_count ++; + pathrelse (&path_to_left_neighbor); + return bh; +} + +extern struct key MIN_KEY; +static struct buffer_head * get_right_neighbor (struct super_block * s, struct path * path) +{ + struct key key; + struct key * rkey; + INITIALIZE_PATH (path_to_right_neighbor); + struct buffer_head * bh; + + rkey = get_rkey (path, s); + if (comp_keys (rkey, &MIN_KEY) == 0) + reiserfs_panic (s, "vs-16080: get_right_neighbor: get_rkey returned min key (path has changed)"); + copy_key (&key, rkey); + + + /*init_path (&path_to_right_neighbor);*/ + search_by_key (s, &key, &path_to_right_neighbor, DISK_LEAF_NODE_LEVEL, READ_BLOCKS); + if (PATH_PLAST_BUFFER (&path_to_right_neighbor) == PATH_PLAST_BUFFER (path)) { + pathrelse (&path_to_right_neighbor); + return 0; + } + bh = PATH_PLAST_BUFFER (&path_to_right_neighbor); + bh->b_count ++; + pathrelse (&path_to_right_neighbor); + return bh; +} + + +int is_left_mergeable (struct super_block * s, struct path * path) +{ + struct item_head * right; + struct buffer_head * bh; + int retval; + + right = B_N_PITEM_HEAD (PATH_PLAST_BUFFER (path), 0); + + bh = get_left_neighbor (s, path); + if (bh == 0) { + return 0; + } + retval = are_items_mergeable (B_N_PITEM_HEAD (bh, B_NR_ITEMS (bh) - 1), right, bh->b_size); + brelse (bh); + return retval; +} + + +int is_right_mergeable (struct super_block * s, struct path * path) +{ + struct item_head * left; + struct buffer_head * bh; + int retval; + + left = B_N_PITEM_HEAD (PATH_PLAST_BUFFER (path), B_NR_ITEMS (PATH_PLAST_BUFFER (path)) - 1); + + bh = get_right_neighbor (s, path); + if (bh == 0) { + return 0; + } + retval = are_items_mergeable (left, B_N_PITEM_HEAD (bh, 0), bh->b_size); + brelse (bh); + return retval; +} + +#endif /* REISERFS_FSCK */ + + + + + diff -u -r --new-file linux/fs/reiserfs/hashes.c v2.4.0-test8/linux/fs/reiserfs/hashes.c --- linux/fs/reiserfs/hashes.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/hashes.c Sat Jun 24 20:37:35 2000 @@ -0,0 +1,223 @@ +/* + * Keyed 32-bit hash function using TEA in a Davis-Meyer function + * H0 = Key + * Hi = E Mi(Hi-1) + Hi-1 + * + * (see Applied Cryptography, 2nd edition, p448). + * + * Jeremy Fitzhardinge <jeremy@zip.com.au> 1998 + * + * Jeremy has agreed to the contents of reiserfs/README. -Hans + * Yura's function is added (04/07/2000) + */ + +// +// keyed_hash +// yura_hash +// r5_hash +// + +#include <asm/types.h> + + + +#define DELTA 0x9E3779B9 +#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */ +#define PARTROUNDS 6 /* 6 gets complete mixing */ + +#ifndef __KERNEL__ +typedef __u32 u32; +#endif + +/* a, b, c, d - data; h0, h1 - accumulated hash */ +#define TEACORE(rounds) \ + do { \ + u32 sum = 0; \ + int n = rounds; \ + u32 b0, b1; \ + \ + b0 = h0; \ + b1 = h1; \ + \ + do \ + { \ + sum += DELTA; \ + b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); \ + b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); \ + } while(--n); \ + \ + h0 += b0; \ + h1 += b1; \ + } while(0) + + +u32 keyed_hash(const char *msg, int len) +{ + u32 k[] = { 0x9464a485, 0x542e1a94, 0x3e846bff, 0xb75bcfc3}; + + u32 h0 = k[0], h1 = k[1]; + u32 a, b, c, d; + u32 pad; + int i; + + + // assert(len >= 0 && len < 256); + + pad = (u32)len | ((u32)len << 8); + pad |= pad << 16; + + while(len >= 16) + { + a = (u32)msg[ 0] | + (u32)msg[ 1] << 8 | + (u32)msg[ 2] << 16| + (u32)msg[ 3] << 24; + b = (u32)msg[ 4] | + (u32)msg[ 5] << 8 | + (u32)msg[ 6] << 16| + (u32)msg[ 7] << 24; + c = (u32)msg[ 8] | + (u32)msg[ 9] << 8 | + (u32)msg[10] << 16| + (u32)msg[11] << 24; + d = (u32)msg[12] | + (u32)msg[13] << 8 | + (u32)msg[14] << 16| + (u32)msg[15] << 24; + + TEACORE(PARTROUNDS); + + len -= 16; + msg += 16; + } + + if (len >= 12) + { + //assert(len < 16); + if (len >= 16) + *(int *)0 = 0; + + a = (u32)msg[ 0] | + (u32)msg[ 1] << 8 | + (u32)msg[ 2] << 16| + (u32)msg[ 3] << 24; + b = (u32)msg[ 4] | + (u32)msg[ 5] << 8 | + (u32)msg[ 6] << 16| + (u32)msg[ 7] << 24; + c = (u32)msg[ 8] | + (u32)msg[ 9] << 8 | + (u32)msg[10] << 16| + (u32)msg[11] << 24; + + d = pad; + for(i = 12; i < len; i++) + { + d <<= 8; + d |= msg[i]; + } + } + else if (len >= 8) + { + //assert(len < 12); + if (len >= 12) + *(int *)0 = 0; + a = (u32)msg[ 0] | + (u32)msg[ 1] << 8 | + (u32)msg[ 2] << 16| + (u32)msg[ 3] << 24; + b = (u32)msg[ 4] | + (u32)msg[ 5] << 8 | + (u32)msg[ 6] << 16| + (u32)msg[ 7] << 24; + + c = d = pad; + for(i = 8; i < len; i++) + { + c <<= 8; + c |= msg[i]; + } + } + else if (len >= 4) + { + //assert(len < 8); + if (len >= 8) + *(int *)0 = 0; + a = (u32)msg[ 0] | + (u32)msg[ 1] << 8 | + (u32)msg[ 2] << 16| + (u32)msg[ 3] << 24; + + b = c = d = pad; + for(i = 4; i < len; i++) + { + b <<= 8; + b |= msg[i]; + } + } + else + { + //assert(len < 4); + if (len >= 4) + *(int *)0 = 0; + a = b = c = d = pad; + for(i = 0; i < len; i++) + { + a <<= 8; + a |= msg[i]; + } + } + + TEACORE(FULLROUNDS); + +/* return 0;*/ + return h0^h1; +} + + +u32 yura_hash (const char *msg, int len) +{ + int j, pow; + u32 a, c; + int i; + + for (pow=1,i=1; i < len; i++) pow = pow * 10; + + if (len == 1) + a = msg[0]-48; + else + a = (msg[0] - 48) * pow; + + for (i=1; i < len; i++) { + c = msg[i] - 48; + for (pow=1,j=i; j < len-1; j++) pow = pow * 10; + a = a + c * pow; + } + + for (; i < 40; i++) { + c = '0' - 48; + for (pow=1,j=i; j < len-1; j++) pow = pow * 10; + a = a + c * pow; + } + + for (; i < 256; i++) { + c = i; + for (pow=1,j=i; j < len-1; j++) pow = pow * 10; + a = a + c * pow; + } + + a = a << 7; + return a; +} + +u32 r5_hash (const char *msg, int len) +{ + u32 a=0; + while(*msg) { + a += *msg << 4; + a += *msg >> 4; + a *= 11; + msg++; + } + return a; +} diff -u -r --new-file linux/fs/reiserfs/ibalance.c v2.4.0-test8/linux/fs/reiserfs/ibalance.c --- linux/fs/reiserfs/ibalance.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/ibalance.c Sun May 21 17:26:41 2000 @@ -0,0 +1,1139 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ + +#ifdef __KERNEL__ + +#include <asm/uaccess.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/reiserfs_fs.h> + +#else + +#include "nokernel.h" + +#endif + + +/* this is one and only function that is used outside (do_balance.c) */ +int balance_internal ( + struct tree_balance * , + int, + int, + struct item_head * , + struct buffer_head ** + ); + +/* modes of internal_shift_left, internal_shift_right and internal_insert_childs */ +#define INTERNAL_SHIFT_FROM_S_TO_L 0 +#define INTERNAL_SHIFT_FROM_R_TO_S 1 +#define INTERNAL_SHIFT_FROM_L_TO_S 2 +#define INTERNAL_SHIFT_FROM_S_TO_R 3 +#define INTERNAL_INSERT_TO_S 4 +#define INTERNAL_INSERT_TO_L 5 +#define INTERNAL_INSERT_TO_R 6 + +static void internal_define_dest_src_infos ( + int shift_mode, + struct tree_balance * tb, + int h, + struct buffer_info * dest_bi, + struct buffer_info * src_bi, + int * d_key, + struct buffer_head ** cf + ) +{ +#ifdef CONFIG_REISERFS_CHECK + memset (dest_bi, 0, sizeof (struct buffer_info)); + memset (src_bi, 0, sizeof (struct buffer_info)); +#endif + /* define dest, src, dest parent, dest position */ + switch (shift_mode) { + case INTERNAL_SHIFT_FROM_S_TO_L: /* used in internal_shift_left */ + src_bi->tb = tb; + src_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); + src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); + src_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); + dest_bi->tb = tb; + dest_bi->bi_bh = tb->L[h]; + dest_bi->bi_parent = tb->FL[h]; + dest_bi->bi_position = get_left_neighbor_position (tb, h); + *d_key = tb->lkey[h]; + *cf = tb->CFL[h]; + break; + case INTERNAL_SHIFT_FROM_L_TO_S: + src_bi->tb = tb; + src_bi->bi_bh = tb->L[h]; + src_bi->bi_parent = tb->FL[h]; + src_bi->bi_position = get_left_neighbor_position (tb, h); + dest_bi->tb = tb; + dest_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); + dest_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); + dest_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); /* dest position is analog of dest->b_item_order */ + *d_key = tb->lkey[h]; + *cf = tb->CFL[h]; + break; + + case INTERNAL_SHIFT_FROM_R_TO_S: /* used in internal_shift_left */ + src_bi->tb = tb; + src_bi->bi_bh = tb->R[h]; + src_bi->bi_parent = tb->FR[h]; + src_bi->bi_position = get_right_neighbor_position (tb, h); + dest_bi->tb = tb; + dest_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); + dest_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); + dest_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); + *d_key = tb->rkey[h]; + *cf = tb->CFR[h]; + break; + + case INTERNAL_SHIFT_FROM_S_TO_R: + src_bi->tb = tb; + src_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); + src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); + src_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); + dest_bi->tb = tb; + dest_bi->bi_bh = tb->R[h]; + dest_bi->bi_parent = tb->FR[h]; + dest_bi->bi_position = get_right_neighbor_position (tb, h); + *d_key = tb->rkey[h]; + *cf = tb->CFR[h]; + break; + + case INTERNAL_INSERT_TO_L: + dest_bi->tb = tb; + dest_bi->bi_bh = tb->L[h]; + dest_bi->bi_parent = tb->FL[h]; + dest_bi->bi_position = get_left_neighbor_position (tb, h); + break; + + case INTERNAL_INSERT_TO_S: + dest_bi->tb = tb; + dest_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); + dest_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); + dest_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); + break; + + case INTERNAL_INSERT_TO_R: + dest_bi->tb = tb; + dest_bi->bi_bh = tb->R[h]; + dest_bi->bi_parent = tb->FR[h]; + dest_bi->bi_position = get_right_neighbor_position (tb, h); + break; + + default: + reiserfs_panic (tb->tb_sb, "internal_define_dest_src_infos", "shift type is unknown (%d)", shift_mode); + } +} + + + +/* Insert count node pointers into buffer cur before position to + 1. + * Insert count items into buffer cur before position to. + * Items and node pointers are specified by inserted and bh respectively. + */ +static void internal_insert_childs (struct buffer_info * cur_bi, + int to, int count, + struct item_head * inserted, + struct buffer_head ** bh + ) +{ + struct buffer_head * cur = cur_bi->bi_bh; + struct block_head * blkh; + int nr; + struct key * ih; + struct disk_child new_dc[2]; + struct disk_child * dc; + int i; + + if (count <= 0) + return; + + nr = le16_to_cpu ((blkh = B_BLK_HEAD(cur))->blk_nr_item); + +#ifdef CONFIG_REISERFS_CHECK + if (count > 2) + reiserfs_panic (0, "internal_insert_childs", "too many children (%d) are to be inserted", count); + if (B_FREE_SPACE (cur) < count * (KEY_SIZE + DC_SIZE)) + reiserfs_panic (0, "internal_insert_childs", "no enough free space (%d), needed %d bytes", + B_FREE_SPACE (cur), count * (KEY_SIZE + DC_SIZE)); +#endif /* CONFIG_REISERFS_CHECK */ + + /* prepare space for count disk_child */ + dc = B_N_CHILD(cur,to+1); + + memmove (dc + count, dc, (nr+1-(to+1)) * DC_SIZE); + + /* copy to_be_insert disk children */ + for (i = 0; i < count; i ++) { + new_dc[i].dc_size = + cpu_to_le16 (MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE (bh[i])); + new_dc[i].dc_block_number = cpu_to_le32 (bh[i]->b_blocknr); + } + memcpy (dc, new_dc, DC_SIZE * count); + + + /* prepare space for count items */ + ih = B_N_PDELIM_KEY (cur, ((to == -1) ? 0 : to)); + + memmove (ih + count, ih, (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE); + + /* copy item headers (keys) */ + memcpy (ih, inserted, KEY_SIZE); + if ( count > 1 ) + memcpy (ih + 1, inserted + 1, KEY_SIZE); + + /* sizes, item number */ + blkh->blk_nr_item = cpu_to_le16 (le16_to_cpu (blkh->blk_nr_item) + count); + blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) - count * (DC_SIZE + KEY_SIZE)); + + do_balance_mark_internal_dirty (cur_bi->tb, cur,0); + + /*&&&&&&&&&&&&&&&&&&&&&&&&*/ + check_internal (cur); + /*&&&&&&&&&&&&&&&&&&&&&&&&*/ + + if (cur_bi->bi_parent) { + B_N_CHILD (cur_bi->bi_parent,cur_bi->bi_position)->dc_size += count * (DC_SIZE + KEY_SIZE); + do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent, 0); + + /*&&&&&&&&&&&&&&&&&&&&&&&&*/ + check_internal (cur_bi->bi_parent); + /*&&&&&&&&&&&&&&&&&&&&&&&&*/ + } + +} + + +/* Delete del_num items and node pointers from buffer cur starting from * + * the first_i'th item and first_p'th pointers respectively. */ +static void internal_delete_pointers_items ( + struct buffer_info * cur_bi, + int first_p, + int first_i, + int del_num + ) +{ + struct buffer_head * cur = cur_bi->bi_bh; + int nr; + struct block_head * blkh; + struct key * key; + struct disk_child * dc; + +#ifdef CONFIG_REISERFS_CHECK + if (cur == NULL) + reiserfs_panic (0, "internal_delete_pointers_items1: buffer is 0"); + + if (del_num < 0) + reiserfs_panic (0, "internal_delete_pointers_items2", + "negative number of items (%d) can not be deleted", del_num); + + if (first_p < 0 || first_p + del_num > B_NR_ITEMS (cur) + 1 || first_i < 0) + reiserfs_panic (0, "internal_delete_pointers_items3", + "first pointer order (%d) < 0 or " + "no so many pointers (%d), only (%d) or " + "first key order %d < 0", first_p, + first_p + del_num, B_NR_ITEMS (cur) + 1, first_i); +#endif /* CONFIG_REISERFS_CHECK */ + if ( del_num == 0 ) + return; + + nr = le16_to_cpu ((blkh = B_BLK_HEAD(cur))->blk_nr_item); + + if ( first_p == 0 && del_num == nr + 1 ) { +#ifdef CONFIG_REISERFS_CHECK + if ( first_i != 0 ) + reiserfs_panic (0, "internal_delete_pointers_items5", + "first deleted key must have order 0, not %d", first_i); +#endif /* CONFIG_REISERFS_CHECK */ + make_empty_node (cur_bi); + return; + } + +#ifdef CONFIG_REISERFS_CHECK + if (first_i + del_num > B_NR_ITEMS (cur)) { + printk("first_i = %d del_num = %d\n",first_i,del_num); + reiserfs_panic (0, "internal_delete_pointers_items4: :" + "no so many keys (%d) in the node (%b)(%z)", first_i + del_num, cur, cur); + } +#endif /* CONFIG_REISERFS_CHECK */ + + + /* deleting */ + dc = B_N_CHILD (cur, first_p); + + memmove (dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE); + key = B_N_PDELIM_KEY (cur, first_i); + memmove (key, key + del_num, (nr - first_i - del_num) * KEY_SIZE + (nr + 1 - del_num) * DC_SIZE); + + + /* sizes, item number */ + blkh->blk_nr_item = cpu_to_le16 (le16_to_cpu (blkh->blk_nr_item) - del_num); + blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) + del_num * (KEY_SIZE + DC_SIZE)); + + do_balance_mark_internal_dirty (cur_bi->tb, cur, 0); + /*&&&&&&&&&&&&&&&&&&&&&&&*/ + check_internal (cur); + /*&&&&&&&&&&&&&&&&&&&&&&&*/ + + if (cur_bi->bi_parent) { + B_N_CHILD (cur_bi->bi_parent, cur_bi->bi_position)->dc_size -= del_num * (KEY_SIZE + DC_SIZE); + do_balance_mark_internal_dirty (cur_bi->tb, cur_bi->bi_parent,0); + /*&&&&&&&&&&&&&&&&&&&&&&&&*/ + check_internal (cur_bi->bi_parent); + /*&&&&&&&&&&&&&&&&&&&&&&&&*/ + } +} + + +/* delete n node pointers and items starting from given position */ +static void internal_delete_childs (struct buffer_info * cur_bi, + int from, int n) +{ + int i_from; + + i_from = (from == 0) ? from : from - 1; + + /* delete n pointers starting from `from' position in CUR; + delete n keys starting from 'i_from' position in CUR; + */ + internal_delete_pointers_items (cur_bi, from, i_from, n); +} + + +/* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest +* last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest + * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest + */ +static void internal_copy_pointers_items ( + struct buffer_info * dest_bi, + struct buffer_head * src, + int last_first, int cpy_num + ) +{ + /* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST * + * as delimiting key have already inserted to buffer dest.*/ + struct buffer_head * dest = dest_bi->bi_bh; + int nr_dest, nr_src; + int dest_order, src_order; + struct block_head * blkh; + struct key * key; + struct disk_child * dc; + + nr_src = B_NR_ITEMS (src); + +#ifdef CONFIG_REISERFS_CHECK + if ( dest == NULL || src == NULL ) + reiserfs_panic (0, "internal_copy_pointers_items", "src (%p) or dest (%p) buffer is 0", src, dest); + + if (last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST) + reiserfs_panic (0, "internal_copy_pointers_items", + "invalid last_first parameter (%d)", last_first); + + if ( nr_src < cpy_num - 1 ) + reiserfs_panic (0, "internal_copy_pointers_items", "no so many items (%d) in src (%d)", cpy_num, nr_src); + + if ( cpy_num < 0 ) + reiserfs_panic (0, "internal_copy_pointers_items", "cpy_num less than 0 (%d)", cpy_num); + + if (cpy_num - 1 + B_NR_ITEMS(dest) > (int)MAX_NR_KEY(dest)) + reiserfs_panic (0, "internal_copy_pointers_items", + "cpy_num (%d) + item number in dest (%d) can not be more than MAX_NR_KEY(%d)", + cpy_num, B_NR_ITEMS(dest), MAX_NR_KEY(dest)); +#endif + + if ( cpy_num == 0 ) + return; + + /* coping */ + nr_dest = le16_to_cpu ((blkh = B_BLK_HEAD(dest))->blk_nr_item); + + /*dest_order = (last_first == LAST_TO_FIRST) ? 0 : nr_dest;*/ + /*src_order = (last_first == LAST_TO_FIRST) ? (nr_src - cpy_num + 1) : 0;*/ + (last_first == LAST_TO_FIRST) ? (dest_order = 0, src_order = nr_src - cpy_num + 1) : + (dest_order = nr_dest, src_order = 0); + + /* prepare space for cpy_num pointers */ + dc = B_N_CHILD (dest, dest_order); + + memmove (dc + cpy_num, dc, (nr_dest - dest_order) * DC_SIZE); + + /* insert pointers */ + memcpy (dc, B_N_CHILD (src, src_order), DC_SIZE * cpy_num); + + + /* prepare space for cpy_num - 1 item headers */ + key = B_N_PDELIM_KEY(dest, dest_order); + memmove (key + cpy_num - 1, key, + KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest + cpy_num)); + + + /* insert headers */ + memcpy (key, B_N_PDELIM_KEY (src, src_order), KEY_SIZE * (cpy_num - 1)); + + /* sizes, item number */ + blkh->blk_nr_item = cpu_to_le16 (le16_to_cpu (blkh->blk_nr_item) + (cpy_num - 1)); + blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) - (KEY_SIZE * (cpy_num - 1) + DC_SIZE * cpy_num)); + + do_balance_mark_internal_dirty (dest_bi->tb, dest, 0); + + /*&&&&&&&&&&&&&&&&&&&&&&&&*/ + check_internal (dest); + /*&&&&&&&&&&&&&&&&&&&&&&&&*/ + + if (dest_bi->bi_parent) { + B_N_CHILD(dest_bi->bi_parent,dest_bi->bi_position)->dc_size += + KEY_SIZE * (cpy_num - 1) + DC_SIZE * cpy_num; + + do_balance_mark_internal_dirty (dest_bi->tb, dest_bi->bi_parent,0); + /*&&&&&&&&&&&&&&&&&&&&&&&&*/ + check_internal (dest_bi->bi_parent); + /*&&&&&&&&&&&&&&&&&&&&&&&&*/ + } + +} + + +/* Copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest. + * Delete cpy_num - del_par items and node pointers from buffer src. + * last_first == FIRST_TO_LAST means, that we copy/delete first items from src. + * last_first == LAST_TO_FIRST means, that we copy/delete last items from src. + */ +static void internal_move_pointers_items (struct buffer_info * dest_bi, + struct buffer_info * src_bi, + int last_first, int cpy_num, int del_par) +{ + int first_pointer; + int first_item; + + internal_copy_pointers_items (dest_bi, src_bi->bi_bh, last_first, cpy_num); + + if (last_first == FIRST_TO_LAST) { /* shift_left occurs */ + first_pointer = 0; + first_item = 0; + /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer, + for key - with first_item */ + internal_delete_pointers_items (src_bi, first_pointer, first_item, cpy_num - del_par); + } else { /* shift_right occurs */ + int i, j; + + i = ( cpy_num - del_par == ( j = B_NR_ITEMS(src_bi->bi_bh)) + 1 ) ? 0 : j - cpy_num + del_par; + + internal_delete_pointers_items (src_bi, j + 1 - cpy_num + del_par, i, cpy_num - del_par); + } +} + +/* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */ +static void internal_insert_key (struct buffer_info * dest_bi, + int dest_position_before, /* insert key before key with n_dest number */ + struct buffer_head * src, + int src_position) +{ + struct buffer_head * dest = dest_bi->bi_bh; + int nr; + struct block_head * blkh; + struct key * key; + +#ifdef CONFIG_REISERFS_CHECK + if (dest == NULL || src == NULL) + reiserfs_panic (0, "internal_insert_key", "sourse(%p) or dest(%p) buffer is 0", src, dest); + + if (dest_position_before < 0 || src_position < 0) + reiserfs_panic (0, "internal_insert_key", "source(%d) or dest(%d) key number less than 0", + src_position, dest_position_before); + + if (dest_position_before > B_NR_ITEMS (dest) || src_position >= B_NR_ITEMS(src)) + reiserfs_panic (0, "internal_insert_key", + "invalid position in dest (%d (key number %d)) or in src (%d (key number %d))", + dest_position_before, B_NR_ITEMS (dest), src_position, B_NR_ITEMS(src)); + + if (B_FREE_SPACE (dest) < KEY_SIZE) + reiserfs_panic (0, "internal_insert_key", + "no enough free space (%d) in dest buffer", B_FREE_SPACE (dest)); +#endif + + nr = le16_to_cpu ((blkh=B_BLK_HEAD(dest))->blk_nr_item); + + /* prepare space for inserting key */ + key = B_N_PDELIM_KEY (dest, dest_position_before); + memmove (key + 1, key, (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE); + + /* insert key */ + memcpy (key, B_N_PDELIM_KEY(src, src_position), KEY_SIZE); + + /* Change dirt, free space, item number fields. */ + blkh->blk_nr_item = cpu_to_le16 (le16_to_cpu (blkh->blk_nr_item) + 1); + blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) - KEY_SIZE); + + do_balance_mark_internal_dirty (dest_bi->tb, dest, 0); + + if (dest_bi->bi_parent) { + B_N_CHILD(dest_bi->bi_parent,dest_bi->bi_position)->dc_size += KEY_SIZE; + do_balance_mark_internal_dirty (dest_bi->tb, dest_bi->bi_parent,0); + } +} + + + +/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest. + * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest. + * Replace d_key'th key in buffer cfl. + * Delete pointer_amount items and node pointers from buffer src. + */ +/* this can be invoked both to shift from S to L and from R to S */ +static void internal_shift_left ( + int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */ + struct tree_balance * tb, + int h, + int pointer_amount + ) +{ + struct buffer_info dest_bi, src_bi; + struct buffer_head * cf; + int d_key_position; + + internal_define_dest_src_infos (mode, tb, h, &dest_bi, &src_bi, &d_key_position, &cf); + + /*printk("pointer_amount = %d\n",pointer_amount);*/ + + if (pointer_amount) { + /* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */ + internal_insert_key (&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, d_key_position); + + if (B_NR_ITEMS(src_bi.bi_bh) == pointer_amount - 1) { + if (src_bi.bi_position/*src->b_item_order*/ == 0) + replace_key (tb, cf, d_key_position, src_bi.bi_parent/*src->b_parent*/, 0); + } else + replace_key (tb, cf, d_key_position, src_bi.bi_bh, pointer_amount - 1); + } + /* last parameter is del_parameter */ + internal_move_pointers_items (&dest_bi, &src_bi, FIRST_TO_LAST, pointer_amount, 0); + +} + +/* Insert delimiting key to L[h]. + * Copy n node pointers and n - 1 items from buffer S[h] to L[h]. + * Delete n - 1 items and node pointers from buffer S[h]. + */ +/* it always shifts from S[h] to L[h] */ +static void internal_shift1_left ( + struct tree_balance * tb, + int h, + int pointer_amount + ) +{ + struct buffer_info dest_bi, src_bi; + struct buffer_head * cf; + int d_key_position; + + internal_define_dest_src_infos (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, &dest_bi, &src_bi, &d_key_position, &cf); + + if ( pointer_amount > 0 ) /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */ + internal_insert_key (&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, d_key_position); + /* internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]);*/ + + /* last parameter is del_parameter */ + internal_move_pointers_items (&dest_bi, &src_bi, FIRST_TO_LAST, pointer_amount, 1); + /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1);*/ +} + + +/* Insert d_key'th (delimiting) key from buffer cfr to head of dest. + * Copy n node pointers and n - 1 items from buffer src to buffer dest. + * Replace d_key'th key in buffer cfr. + * Delete n items and node pointers from buffer src. + */ +static void internal_shift_right ( + int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */ + struct tree_balance * tb, + int h, + int pointer_amount + ) +{ + struct buffer_info dest_bi, src_bi; + struct buffer_head * cf; + int d_key_position; + int nr; + + + internal_define_dest_src_infos (mode, tb, h, &dest_bi, &src_bi, &d_key_position, &cf); + + nr = B_NR_ITEMS (src_bi.bi_bh); + + if (pointer_amount > 0) { + /* insert delimiting key from common father of dest and src to dest node into position 0 */ + internal_insert_key (&dest_bi, 0, cf, d_key_position); + if (nr == pointer_amount - 1) { +#ifdef CONFIG_REISERFS_CHECK + if ( src_bi.bi_bh != PATH_H_PBUFFER (tb->tb_path, h)/*tb->S[h]*/ || dest_bi.bi_bh != tb->R[h]) + reiserfs_panic (tb->tb_sb, "internal_shift_right", "src (%p) must be == tb->S[h](%p) when it disappears", + src_bi.bi_bh, PATH_H_PBUFFER (tb->tb_path, h)); +#endif + /* when S[h] disappers replace left delemiting key as well */ + if (tb->CFL[h]) + replace_key (tb, cf, d_key_position, tb->CFL[h], tb->lkey[h]); + } else + replace_key (tb, cf, d_key_position, src_bi.bi_bh, nr - pointer_amount); + } + + /* last parameter is del_parameter */ + internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, pointer_amount, 0); +} + +/* Insert delimiting key to R[h]. + * Copy n node pointers and n - 1 items from buffer S[h] to R[h]. + * Delete n - 1 items and node pointers from buffer S[h]. + */ +/* it always shift from S[h] to R[h] */ +static void internal_shift1_right ( + struct tree_balance * tb, + int h, + int pointer_amount + ) +{ + struct buffer_info dest_bi, src_bi; + struct buffer_head * cf; + int d_key_position; + + internal_define_dest_src_infos (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, &dest_bi, &src_bi, &d_key_position, &cf); + + if (pointer_amount > 0) /* insert rkey from CFR[h] to right neighbor R[h] */ + internal_insert_key (&dest_bi, 0, cf, d_key_position); + /* internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]);*/ + + /* last parameter is del_parameter */ + internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, pointer_amount, 1); + /* internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1);*/ +} + + +/* Delete insert_num node pointers together with their left items + * and balance current node.*/ +static void balance_internal_when_delete (struct tree_balance * tb, + int h, int child_pos) +{ + int insert_num; + int n; + struct buffer_head * tbSh = PATH_H_PBUFFER (tb->tb_path, h); + struct buffer_info bi; + + insert_num = tb->insert_size[h] / ((int)(DC_SIZE + KEY_SIZE)); + + /* delete child-node-pointer(s) together with their left item(s) */ + bi.tb = tb; + bi.bi_bh = tbSh; + bi.bi_parent = PATH_H_PPARENT (tb->tb_path, h); + bi.bi_position = PATH_H_POSITION (tb->tb_path, h + 1); + + internal_delete_childs (&bi, child_pos, -insert_num); + +#ifdef CONFIG_REISERFS_CHECK + if ( tb->blknum[h] > 1 ) + reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", "tb->blknum[%d]=%d when insert_size < 0", + h, tb->blknum[h]); +#endif /* CONFIG_REISERFS_CHECK */ + + n = B_NR_ITEMS(tbSh); + + if ( tb->lnum[h] == 0 && tb->rnum[h] == 0 ) { + if ( tb->blknum[h] == 0 ) { + /* node S[h] (root of the tree) is empty now */ + struct buffer_head *new_root; + +#ifdef CONFIG_REISERFS_CHECK + if (n || B_FREE_SPACE (tbSh) != MAX_CHILD_SIZE(tbSh) - DC_SIZE) + reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", "buffer must have only 0 keys (%d)", + n); + + if (bi.bi_parent) + reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", "root has parent (%p)", bi.bi_parent); +#endif /* CONFIG_REISERFS_CHECK */ + + /* choose a new root */ + if ( ! tb->L[h-1] || ! B_NR_ITEMS(tb->L[h-1]) ) + new_root = tb->R[h-1]; + else + new_root = tb->L[h-1]; + /* switch super block's tree root block number to the new value */ + tb->tb_sb->u.reiserfs_sb.s_rs->s_root_block = cpu_to_le32 (new_root->b_blocknr); + //tb->tb_sb->u.reiserfs_sb.s_rs->s_tree_height --; + tb->tb_sb->u.reiserfs_sb.s_rs->s_tree_height = cpu_to_le16 (SB_TREE_HEIGHT (tb->tb_sb) - 1); + + do_balance_mark_sb_dirty (tb, tb->tb_sb->u.reiserfs_sb.s_sbh, 1); + /*&&&&&&&&&&&&&&&&&&&&&&*/ + if (h > 1) + /* use check_internal if new root is an internal node */ + check_internal (new_root); + /*&&&&&&&&&&&&&&&&&&&&&&*/ + tb->tb_sb->s_dirt = 1; + + /* do what is needed for buffer thrown from tree */ + reiserfs_invalidate_buffer(tb, tbSh); + return; + } + return; + } + + if ( tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1 ) { /* join S[h] with L[h] */ + +#ifdef CONFIG_REISERFS_CHECK + if ( tb->rnum[h] != 0 ) + reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", "invalid tb->rnum[%d]==%d when joining S[h] with L[h]", + h, tb->rnum[h]); +#endif /* CONFIG_REISERFS_CHECK */ + + internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, n + 1); + reiserfs_invalidate_buffer(tb, tbSh); + + return; + } + + if ( tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1 ) { /* join S[h] with R[h] */ +#ifdef CONFIG_REISERFS_CHECK + if ( tb->lnum[h] != 0 ) + reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", "invalid tb->lnum[%d]==%d when joining S[h] with R[h]", + h, tb->lnum[h]); +#endif /* CONFIG_REISERFS_CHECK */ + + internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, n + 1); + + reiserfs_invalidate_buffer(tb,tbSh); + return; + } + + if ( tb->lnum[h] < 0 ) { /* borrow from left neighbor L[h] */ +#ifdef CONFIG_REISERFS_CHECK + if ( tb->rnum[h] != 0 ) + reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", "invalid tb->rnum[%d]==%d when borrow from L[h]", + h, tb->rnum[h]); +#endif /* CONFIG_REISERFS_CHECK */ + /*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]);*/ + internal_shift_right (INTERNAL_SHIFT_FROM_L_TO_S, tb, h, -tb->lnum[h]); + return; + } + + if ( tb->rnum[h] < 0 ) { /* borrow from right neighbor R[h] */ +#ifdef CONFIG_REISERFS_CHECK + if ( tb->lnum[h] != 0 ) + reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", "invalid tb->lnum[%d]==%d when borrow from R[h]", + h, tb->lnum[h]); +#endif /* CONFIG_REISERFS_CHECK */ + internal_shift_left (INTERNAL_SHIFT_FROM_R_TO_S, tb, h, -tb->rnum[h]);/*tb->S[h], tb->CFR[h], tb->rkey[h], tb->R[h], -tb->rnum[h]);*/ + return; + } + + if ( tb->lnum[h] > 0 ) { /* split S[h] into two parts and put them into neighbors */ +#ifdef CONFIG_REISERFS_CHECK + if ( tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1 ) + reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", + "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them", + h, tb->lnum[h], h, tb->rnum[h], n); +#endif /* CONFIG_REISERFS_CHECK */ + + internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]);/*tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], tb->lnum[h]);*/ + internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, tb->rnum[h]); + + reiserfs_invalidate_buffer (tb, tbSh); + + return; + } + reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", "unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d", + h, tb->lnum[h], h, tb->rnum[h]); +} + + +/* Replace delimiting key of buffers L[h] and S[h] by the given key.*/ +void replace_lkey ( + struct tree_balance * tb, + int h, + struct item_head * key + ) +{ +#ifdef CONFIG_REISERFS_CHECK + if (tb->L[h] == NULL || tb->CFL[h] == NULL) + reiserfs_panic (tb->tb_sb, "replace_lkey: 12255: " + "L[h](%p) and CFL[h](%p) must exist in replace_lkey", tb->L[h], tb->CFL[h]); +#endif + + if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0) + return; + + memcpy (B_N_PDELIM_KEY(tb->CFL[h],tb->lkey[h]), key, KEY_SIZE); + + do_balance_mark_internal_dirty (tb, tb->CFL[h],0); +} + + +/* Replace delimiting key of buffers S[h] and R[h] by the given key.*/ +void replace_rkey ( + struct tree_balance * tb, + int h, + struct item_head * key + ) +{ +#ifdef CONFIG_REISERFS_CHECK + if (tb->R[h] == NULL || tb->CFR[h] == NULL) + reiserfs_panic (tb->tb_sb, "replace_rkey: 12260: " + "R[h](%p) and CFR[h](%p) must exist in replace_rkey", tb->R[h], tb->CFR[h]); + + if (B_NR_ITEMS(tb->R[h]) == 0) + reiserfs_panic (tb->tb_sb, "replace_rkey: 12265: " + "R[h] can not be empty if it exists (item number=%d)", B_NR_ITEMS(tb->R[h])); +#endif + + memcpy (B_N_PDELIM_KEY(tb->CFR[h],tb->rkey[h]), key, KEY_SIZE); + + do_balance_mark_internal_dirty (tb, tb->CFR[h], 0); +} + + +int balance_internal (struct tree_balance * tb, /* tree_balance structure */ + int h, /* level of the tree */ + int child_pos, + struct item_head * insert_key, /* key for insertion on higher level */ + struct buffer_head ** insert_ptr /* node for insertion on higher level*/ + ) + /* if inserting/pasting + { + child_pos is the position of the node-pointer in S[h] that * + pointed to S[h-1] before balancing of the h-1 level; * + this means that new pointers and items must be inserted AFTER * + child_pos + } + else + { + it is the position of the leftmost pointer that must be deleted (together with + its corresponding key to the left of the pointer) + as a result of the previous level's balancing. + } +*/ +{ + struct buffer_head * tbSh = PATH_H_PBUFFER (tb->tb_path, h); + struct buffer_info bi; + int order; /* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */ + int insert_num, n, k; + struct buffer_head * S_new; + struct item_head new_insert_key; + struct buffer_head * new_insert_ptr = NULL; + struct item_head * new_insert_key_addr = insert_key; + +#ifdef CONFIG_REISERFS_CHECK + if ( h < 1 ) + reiserfs_panic (tb->tb_sb, "balance_internal", "h (%d) can not be < 1 on internal level", h); +#endif /* CONFIG_REISERFS_CHECK */ + + order = ( tbSh ) ? PATH_H_POSITION (tb->tb_path, h + 1)/*tb->S[h]->b_item_order*/ : 0; + + /* Using insert_size[h] calculate the number insert_num of items + that must be inserted to or deleted from S[h]. */ + insert_num = tb->insert_size[h]/((int)(KEY_SIZE + DC_SIZE)); + + /* Check whether insert_num is proper **/ +#ifdef CONFIG_REISERFS_CHECK + if ( insert_num < -2 || insert_num > 2 ) + reiserfs_panic (tb->tb_sb, "balance_internal", + "incorrect number of items inserted to the internal node (%d)", insert_num); + + if ( h > 1 && (insert_num > 1 || insert_num < -1) ) + reiserfs_panic (tb->tb_sb, "balance_internal", + "incorrect number of items (%d) inserted to the internal node on a level (h=%d) higher than last internal level", + insert_num, h); +#endif /* CONFIG_REISERFS_CHECK */ + + /* Make balance in case insert_num < 0 */ + if ( insert_num < 0 ) { + balance_internal_when_delete (tb, h, child_pos); + return order; + } + + k = 0; + if ( tb->lnum[h] > 0 ) { + /* shift lnum[h] items from S[h] to the left neighbor L[h]. + check how many of new items fall into L[h] or CFL[h] after + shifting */ + n = B_NR_ITEMS (tb->L[h]); /* number of items in L[h] */ + if ( tb->lnum[h] <= child_pos ) { + /* new items don't fall into L[h] or CFL[h] */ + internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]); + /*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]);*/ + child_pos -= tb->lnum[h]; + } else if ( tb->lnum[h] > child_pos + insert_num ) { + /* all new items fall into L[h] */ + internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h] - insert_num); + /* internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh, + tb->lnum[h]-insert_num); + */ + /* insert insert_num keys and node-pointers into L[h] */ + bi.tb = tb; + bi.bi_bh = tb->L[h]; + bi.bi_parent = tb->FL[h]; + bi.bi_position = get_left_neighbor_position (tb, h); + internal_insert_childs (&bi,/*tb->L[h], tb->S[h-1]->b_next*/ n + child_pos + 1, + insert_num,insert_key,insert_ptr); + + insert_num = 0; + } else { + struct disk_child * dc; + + /* some items fall into L[h] or CFL[h], but some don't fall */ + internal_shift1_left(tb,h,child_pos+1); + /* calculate number of new items that fall into L[h] */ + k = tb->lnum[h] - child_pos - 1; + bi.tb = tb; + bi.bi_bh = tb->L[h]; + bi.bi_parent = tb->FL[h]; + bi.bi_position = get_left_neighbor_position (tb, h); + internal_insert_childs (&bi,/*tb->L[h], tb->S[h-1]->b_next,*/ n + child_pos + 1,k, + insert_key,insert_ptr); + + replace_lkey(tb,h,insert_key + k); + + /* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */ + dc = B_N_CHILD(tbSh, 0); + dc->dc_size = cpu_to_le16 (MAX_CHILD_SIZE(insert_ptr[k]) - B_FREE_SPACE (insert_ptr[k])); + dc->dc_block_number = cpu_to_le32 (insert_ptr[k]->b_blocknr); + + do_balance_mark_internal_dirty (tb, tbSh, 0); + + k++; + insert_key += k; + insert_ptr += k; + insert_num -= k; + child_pos = 0; + } + } /* tb->lnum[h] > 0 */ + + if ( tb->rnum[h] > 0 ) { + /*shift rnum[h] items from S[h] to the right neighbor R[h]*/ + /* check how many of new items fall into R or CFR after shifting */ + n = B_NR_ITEMS (tbSh); /* number of items in S[h] */ + if ( n - tb->rnum[h] >= child_pos ) + /* new items fall into S[h] */ + /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]);*/ + internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, tb->rnum[h]); + else + if ( n + insert_num - tb->rnum[h] < child_pos ) + { + /* all new items fall into R[h] */ + /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h], + tb->rnum[h] - insert_num);*/ + internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, tb->rnum[h] - insert_num); + + /* insert insert_num keys and node-pointers into R[h] */ + bi.tb = tb; + bi.bi_bh = tb->R[h]; + bi.bi_parent = tb->FR[h]; + bi.bi_position = get_right_neighbor_position (tb, h); + internal_insert_childs (&bi, /*tb->R[h],tb->S[h-1]->b_next*/ child_pos - n - insert_num + tb->rnum[h] - 1, + insert_num,insert_key,insert_ptr); + insert_num = 0; + } + else + { + struct disk_child * dc; + + /* one of the items falls into CFR[h] */ + internal_shift1_right(tb,h,n - child_pos + 1); + /* calculate number of new items that fall into R[h] */ + k = tb->rnum[h] - n + child_pos - 1; + bi.tb = tb; + bi.bi_bh = tb->R[h]; + bi.bi_parent = tb->FR[h]; + bi.bi_position = get_right_neighbor_position (tb, h); + internal_insert_childs (&bi, /*tb->R[h], tb->R[h]->b_child,*/ 0, k, insert_key + 1, insert_ptr + 1); + + replace_rkey(tb,h,insert_key + insert_num - k - 1); + + /* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1]*/ + dc = B_N_CHILD(tb->R[h], 0); + dc->dc_size = + cpu_to_le16 (MAX_CHILD_SIZE(insert_ptr[insert_num-k-1]) - + B_FREE_SPACE (insert_ptr[insert_num-k-1])); + dc->dc_block_number = cpu_to_le32 (insert_ptr[insert_num-k-1]->b_blocknr); + + do_balance_mark_internal_dirty (tb, tb->R[h],0); + + insert_num -= (k + 1); + } + } + + /** Fill new node that appears instead of S[h] **/ +#ifdef CONFIG_REISERFS_CHECK + if ( tb->blknum[h] > 2 ) + reiserfs_panic(0, "balance_internal", "blknum can not be > 2 for internal level"); + if ( tb->blknum[h] < 0 ) + reiserfs_panic(0, "balance_internal", "blknum can not be < 0"); +#endif /* CONFIG_REISERFS_CHECK */ + + if ( ! tb->blknum[h] ) + { /* node S[h] is empty now */ +#ifdef CONFIG_REISERFS_CHECK + if ( ! tbSh ) + reiserfs_panic(0,"balance_internal", "S[h] is equal NULL"); +#endif /* CONFIG_REISERFS_CHECK */ + + /* do what is needed for buffer thrown from tree */ + reiserfs_invalidate_buffer(tb,tbSh); + return order; + } + + if ( ! tbSh ) { + /* create new root */ + struct disk_child * dc; + struct buffer_head * tbSh_1 = PATH_H_PBUFFER (tb->tb_path, h - 1); + + + if ( tb->blknum[h] != 1 ) + reiserfs_panic(0, "balance_internal", "One new node required for creating the new root"); + /* S[h] = empty buffer from the list FEB. */ + tbSh = get_FEB (tb); + B_BLK_HEAD(tbSh)->blk_level = cpu_to_le16 (h + 1); + + /* Put the unique node-pointer to S[h] that points to S[h-1]. */ + + dc = B_N_CHILD(tbSh, 0); + dc->dc_block_number = cpu_to_le32 (tbSh_1->b_blocknr); + dc->dc_size = cpu_to_le16 (MAX_CHILD_SIZE (tbSh_1) - B_FREE_SPACE (tbSh_1)); + + tb->insert_size[h] -= DC_SIZE; + B_BLK_HEAD(tbSh)->blk_free_space = cpu_to_le16 (B_FREE_SPACE (tbSh) - DC_SIZE); + + do_balance_mark_internal_dirty (tb, tbSh, 0); + + /*&&&&&&&&&&&&&&&&&&&&&&&&*/ + check_internal (tbSh); + /*&&&&&&&&&&&&&&&&&&&&&&&&*/ + + /* put new root into path structure */ + PATH_OFFSET_PBUFFER(tb->tb_path, ILLEGAL_PATH_ELEMENT_OFFSET) = tbSh; + + /* Change root in structure super block. */ + tb->tb_sb->u.reiserfs_sb.s_rs->s_root_block = cpu_to_le32 (tbSh->b_blocknr); + tb->tb_sb->u.reiserfs_sb.s_rs->s_tree_height = cpu_to_le16 (SB_TREE_HEIGHT (tb->tb_sb) + 1); + do_balance_mark_sb_dirty (tb, tb->tb_sb->u.reiserfs_sb.s_sbh, 1); + tb->tb_sb->s_dirt = 1; + } + + if ( tb->blknum[h] == 2 ) { + int snum; + struct buffer_info dest_bi, src_bi; + + + /* S_new = free buffer from list FEB */ + S_new = get_FEB(tb); + + B_BLK_HEAD(S_new)->blk_level = cpu_to_le16 (h + 1); + + dest_bi.tb = tb; + dest_bi.bi_bh = S_new; + dest_bi.bi_parent = 0; + dest_bi.bi_position = 0; + src_bi.tb = tb; + src_bi.bi_bh = tbSh; + src_bi.bi_parent = PATH_H_PPARENT (tb->tb_path, h); + src_bi.bi_position = PATH_H_POSITION (tb->tb_path, h + 1); + + n = B_NR_ITEMS (tbSh); /* number of items in S[h] */ + snum = (insert_num + n + 1)/2; + if ( n - snum >= child_pos ) { + /* new items don't fall into S_new */ + /* store the delimiting key for the next level */ + /* new_insert_key = (n - snum)'th key in S[h] */ + memcpy (&new_insert_key,B_N_PDELIM_KEY(tbSh,n - snum), + KEY_SIZE); + /* last parameter is del_par */ + internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, snum, 0); + /* internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0);*/ + } else if ( n + insert_num - snum < child_pos ) { + /* all new items fall into S_new */ + /* store the delimiting key for the next level */ + /* new_insert_key = (n + insert_item - snum)'th key in S[h] */ + memcpy(&new_insert_key,B_N_PDELIM_KEY(tbSh,n + insert_num - snum), + KEY_SIZE); + /* last parameter is del_par */ + internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, snum - insert_num, 0); + /* internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0);*/ + + /* insert insert_num keys and node-pointers into S_new */ + internal_insert_childs (&dest_bi, /*S_new,tb->S[h-1]->b_next,*/child_pos - n - insert_num + snum - 1, + insert_num,insert_key,insert_ptr); + + insert_num = 0; + } else { + struct disk_child * dc; + + /* some items fall into S_new, but some don't fall */ + /* last parameter is del_par */ + internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, n - child_pos + 1, 1); + /* internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1);*/ + /* calculate number of new items that fall into S_new */ + k = snum - n + child_pos - 1; + + internal_insert_childs (&dest_bi, /*S_new,*/ 0, k, insert_key + 1, insert_ptr+1); + + /* new_insert_key = insert_key[insert_num - k - 1] */ + memcpy(&new_insert_key,insert_key + insert_num - k - 1, + KEY_SIZE); + /* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */ + + dc = B_N_CHILD(S_new,0); + dc->dc_size = cpu_to_le16 (MAX_CHILD_SIZE(insert_ptr[insert_num-k-1]) - + B_FREE_SPACE(insert_ptr[insert_num-k-1])); + dc->dc_block_number = cpu_to_le32 (insert_ptr[insert_num-k-1]->b_blocknr); + + do_balance_mark_internal_dirty (tb, S_new,0); + + insert_num -= (k + 1); + } + /* new_insert_ptr = node_pointer to S_new */ + new_insert_ptr = S_new; + +#ifdef CONFIG_REISERFS_CHECK + if ( buffer_locked(S_new) || atomic_read (&(S_new->b_count)) != 1) + if (buffer_locked(S_new) || atomic_read(&(S_new->b_count)) > 2 || + !(buffer_journaled(S_new) || buffer_journal_dirty(S_new))) { + reiserfs_panic (tb->tb_sb, "cm-00001: balance_internal: bad S_new (%b)", S_new); + } +#endif /* CONFIG_REISERFS_CHECK */ + + // S_new is released in unfix_nodes + } + + n = B_NR_ITEMS (tbSh); /*number of items in S[h] */ + +#ifdef REISERFS_FSCK + if ( -1 <= child_pos && child_pos <= n && insert_num > 0 ) { +#else + if ( 0 <= child_pos && child_pos <= n && insert_num > 0 ) { +#endif + bi.tb = tb; + bi.bi_bh = tbSh; + bi.bi_parent = PATH_H_PPARENT (tb->tb_path, h); + bi.bi_position = PATH_H_POSITION (tb->tb_path, h + 1); +#ifdef REISERFS_FSCK + if (child_pos == -1) { + /* this is a little different from original do_balance: + here we insert the minimal keys in the tree, that has never happened when file system works */ + if (tb->CFL[h-1] || insert_num != 1 || h != 1) + die ("balance_internal: invalid child_pos"); +/* insert_child (tb->S[h], tb->S[h-1], child_pos, insert_num, B_N_ITEM_HEAD(tb->S[0],0), insert_ptr);*/ + internal_insert_childs (&bi, child_pos, insert_num, B_N_PITEM_HEAD (PATH_PLAST_BUFFER (tb->tb_path), 0), insert_ptr); + } else +#endif + internal_insert_childs ( + &bi,/*tbSh,*/ + /* ( tb->S[h-1]->b_parent == tb->S[h] ) ? tb->S[h-1]->b_next : tb->S[h]->b_child->b_next,*/ + child_pos,insert_num,insert_key,insert_ptr + ); + } + + + memcpy (new_insert_key_addr,&new_insert_key,KEY_SIZE); + insert_ptr[0] = new_insert_ptr; + + return order; + } + + + diff -u -r --new-file linux/fs/reiserfs/inode.c v2.4.0-test8/linux/fs/reiserfs/inode.c --- linux/fs/reiserfs/inode.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/inode.c Thu Sep 21 12:25:29 2000 @@ -0,0 +1,1753 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ +#ifdef __KERNEL__ + +#include <linux/sched.h> +#include <linux/reiserfs_fs.h> +#include <linux/locks.h> +#include <linux/smp_lock.h> +#include <asm/uaccess.h> + +#else + +#include "nokernel.h" + +#endif + +/* args for the create parameter of reiserfs_get_block */ +#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ +#define GET_BLOCK_CREATE 1 /* add anything you need to find block */ +#define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */ +#define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */ + +// +// initially this function was derived from minix or ext2's analog and +// evolved as the prototype did +// +void reiserfs_delete_inode (struct inode * inode) +{ + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2; + int windex ; + struct reiserfs_transaction_handle th ; + + + lock_kernel() ; + + /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ + if (INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ + down (&inode->i_sem); + + journal_begin(&th, inode->i_sb, jbegin_count) ; + windex = push_journal_writer("delete_inode") ; + + reiserfs_delete_object (&th, inode); + reiserfs_remove_page_from_flush_list(&th, inode) ; + pop_journal_writer(windex) ; + reiserfs_release_objectid (&th, inode->i_ino); + + journal_end(&th, inode->i_sb, jbegin_count) ; + + up (&inode->i_sem); + } else { + /* no object items are in the tree */ + ; + } + clear_inode (inode); /* note this must go after the journal_end to prevent deadlock */ + unlock_kernel() ; +} + +#if 0 +static void copy_data_blocks_to_inode (struct inode * inode, struct item_head * ih, __u32 * ind_item) +{ + int first_log_block = (ih->ih_key.k_offset - 1) / inode->i_sb->s_blocksize; /* first log block addressed by indirect item */ + int i, j; + + for (i = first_log_block, j = 0; i < REISERFS_N_BLOCKS && j < I_UNFM_NUM (ih); i ++, j ++) { +#ifdef CONFIG_REISERFS_CHECK + if (inode->u.reiserfs_i.i_data [i] && inode->u.reiserfs_i.i_data [i] != ind_item [j]) + reiserfs_panic (inode->i_sb, "vs-13000: copy_data_blocks_to_inode: " + "log block %d, data block %d is seet and doe not match to unfmptr %d", + i, inode->u.reiserfs_i.i_data [i], ind_item [j]); +#endif + inode->u.reiserfs_i.i_data [i] = ind_item [j]; + } +} +#endif/*0*/ + + + +static void _make_cpu_key (struct cpu_key * key, int version, __u32 dirid, __u32 objectid, + loff_t offset, int type, int length) +{ + key->version = version; + + key->on_disk_key.k_dir_id = dirid; + key->on_disk_key.k_objectid = objectid; + set_cpu_key_k_offset (key, offset); + set_cpu_key_k_type (key, type); + key->key_length = length; +} + + +/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set + offset and type of key */ +void make_cpu_key (struct cpu_key * key, const struct inode * inode, loff_t offset, + int type, int length) +{ + _make_cpu_key (key, inode_items_version (inode), le32_to_cpu (INODE_PKEY (inode)->k_dir_id), + le32_to_cpu (INODE_PKEY (inode)->k_objectid), + offset, type, length); +} + + +// +// when key is 0, do not set version and short key +// +inline void make_le_item_head (struct item_head * ih, struct cpu_key * key, int version, + loff_t offset, int type, int length, int entry_count/*or ih_free_space*/) +{ + if (key) { + ih->ih_key.k_dir_id = cpu_to_le32 (key->on_disk_key.k_dir_id); + ih->ih_key.k_objectid = cpu_to_le32 (key->on_disk_key.k_objectid); + } + ih->ih_version = cpu_to_le16 (version); + set_le_ih_k_offset (ih, offset); + set_le_ih_k_type (ih, type); + ih->ih_item_len = cpu_to_le16 (length); + /* set_ih_free_space (ih, 0);*/ + // for directory items it is entry count, for directs and stat + // datas - 0xffff, for indirects - 0 + ih->u.ih_entry_count = cpu_to_le16 (entry_count); +} + + +// +// FIXME: we might cache recently accessed indirect item (or at least +// first 15 pointers just like ext2 does +// +static int got_from_inode (struct inode * inode, b_blocknr_t * pblock) +{ + return 0; +} + +/* people who call journal_begin with a page locked must call this +** BEFORE calling journal_begin +*/ +static int prevent_flush_page_lock(struct page *page, + struct inode *inode) { + struct reiserfs_page_list *pl ; + struct super_block *s = inode->i_sb ; + /* we don't care if the inode has a stale pointer from an old + ** transaction + */ + if(!page || inode->u.reiserfs_i.i_conversion_trans_id != SB_JOURNAL(s)->j_trans_id) { + return 0 ; + } + pl = inode->u.reiserfs_i.i_converted_page ; + if (pl && pl->page == page) { + pl->do_not_lock = 1 ; + } + /* this last part is really important. The address space operations have + ** the page locked before they call the journal functions. So it is possible + ** for one process to be waiting in flush_pages_before_commit for a + ** page, then for the process with the page locked to call journal_begin. + ** + ** We'll deadlock because the process flushing pages will never notice + ** the process with the page locked has called prevent_flush_page_lock. + ** So, we wake up the page waiters, even though the page is still locked. + ** The process waiting in flush_pages_before_commit must check the + ** pl->do_not_lock flag, and stop trying to lock the page. + */ + wake_up(&page->wait) ; + return 0 ; + +} +/* people who call journal_end with a page locked must call this +** AFTER calling journal_end +*/ +static int allow_flush_page_lock(struct page *page, + struct inode *inode) { + + struct reiserfs_page_list *pl ; + struct super_block *s = inode->i_sb ; + /* we don't care if the inode has a stale pointer from an old + ** transaction + */ + if(!page || inode->u.reiserfs_i.i_conversion_trans_id != SB_JOURNAL(s)->j_trans_id) { + return 0 ; + } + pl = inode->u.reiserfs_i.i_converted_page ; + if (pl && pl->page == page) { + pl->do_not_lock = 0 ; + } + return 0 ; + +} + +/* If this page has a file tail in it, and +** it was read in by get_block_create_0, the page data is valid, +** but tail is still sitting in a direct item, and we can't write to +** it. So, look through this page, and check all the mapped buffers +** to make sure they have valid block numbers. Any that don't need +** to be unmapped, so that block_prepare_write will correctly call +** reiserfs_get_block to convert the tail into an unformatted node +*/ +static inline void fix_tail_page_for_writing(struct page *page) { + struct buffer_head *head, *next, *bh ; + + if (page && page->buffers) { + head = page->buffers ; + bh = head ; + do { + next = bh->b_this_page ; + if (buffer_mapped(bh) && bh->b_blocknr == 0) { + reiserfs_unmap_buffer(bh) ; + } + bh = next ; + } while (bh != head) ; + } +} + + + + +/* we need to allocate a block for new unformatted node. Try to figure out + what point in bitmap reiserfs_new_blocknrs should start from. */ +static b_blocknr_t find_tag (struct buffer_head * bh, struct item_head * ih, + __u32 * item, int pos_in_item) +{ + if (!is_indirect_le_ih (ih)) + /* something more complicated could be here */ + return bh->b_blocknr; + + /* for indirect item: go to left and look for the first non-hole entry in + the indirect item */ + if (pos_in_item == I_UNFM_NUM (ih)) + pos_in_item --; + while (pos_in_item >= 0) { + if (item [pos_in_item]) + return item [pos_in_item]; + pos_in_item --; + } + return bh->b_blocknr; +} + + +/* reiserfs_get_block does not need to allocate a block only if it has been + done already or non-hole position has been found in the indirect item */ +static inline int allocation_needed (int retval, b_blocknr_t allocated, + struct item_head * ih, + __u32 * item, int pos_in_item) +{ + if (allocated) + return 0; + if (retval == POSITION_FOUND && is_indirect_le_ih (ih) && item[pos_in_item]) + return 0; + return 1; +} + +static inline int indirect_item_found (int retval, struct item_head * ih) +{ + return (retval == POSITION_FOUND) && is_indirect_le_ih (ih); +} + + +static inline void set_block_dev_mapped (struct buffer_head * bh, + b_blocknr_t block, struct inode * inode) +{ + bh->b_dev = inode->i_dev; + bh->b_blocknr = block; + bh->b_state |= (1UL << BH_Mapped); +} + + +// +// files which were created in the earlier version can not be longer, +// than 2 gb +// +int file_capable (struct inode * inode, long block) +{ + if (inode_items_version (inode) != ITEM_VERSION_1 || // it is new file. + block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb + return 1; + + return 0; +} + +/*static*/ void restart_transaction(struct reiserfs_transaction_handle *th, + struct inode *inode, struct path *path) { + struct super_block *s = th->t_super ; + int len = th->t_blocks_allocated ; + + pathrelse(path) ; + reiserfs_update_sd(th, inode) ; + journal_end(th, s, len) ; + journal_begin(th, s, len) ; +} + +// it is called by get_block when create == 0. Returns block number +// for 'block'-th logical block of file. When it hits direct item it +// returns 0 (being called from bmap) or read direct item into piece +// of page (bh_result) +static int _get_block_create_0 (struct inode * inode, long block, + struct buffer_head * bh_result, + int args) +{ + INITIALIZE_PATH (path); + struct cpu_key key; + struct buffer_head * bh; + struct item_head * ih; + int blocknr; + char * p; + int chars; + int ret ; + + + if (got_from_inode (inode, &bh_result->b_blocknr)) { + bh_result->b_dev = inode->i_dev; + //bh_result->b_blocknr = block; + bh_result->b_state |= (1UL << BH_Mapped); + return 0; + } + + // prepare the key to look for the 'block'-th block of file + make_cpu_key (&key, inode, + (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3); + + if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) { + pathrelse (&path); + return -ENOENT; + } + + // + bh = get_bh (&path); + ih = get_ih (&path); + if (is_indirect_le_ih (ih)) { + __u32 * ind_item = (__u32 *)B_I_PITEM (bh, ih); + + /* FIXME: here we could cache indirect item or part of it in + the inode to avoid search_by_key in case of subsequent + access to file */ + blocknr = le32_to_cpu (ind_item [path.pos_in_item]); + ret = 0 ; + if (blocknr) { + bh_result->b_dev = inode->i_dev; + bh_result->b_blocknr = blocknr; + bh_result->b_state |= (1UL << BH_Mapped); + } else if ((args & GET_BLOCK_NO_HOLE)) { + ret = -ENOENT ; + } + pathrelse (&path); + return ret ; + } + + + // requested data are in direct item(s) + if (!(args & GET_BLOCK_READ_DIRECT)) { + // we are called by bmap. FIXME: we can not map block of file + // when it is stored in direct item(s) + pathrelse (&path); + return -ENOENT; + } + + // read file tail into part of page + p = bh_result->b_data; + memset (p, 0, inode->i_sb->s_blocksize); + do { + if (!is_direct_le_ih (ih)) + BUG (); + chars = le16_to_cpu (ih->ih_item_len) - path.pos_in_item; + memcpy (p, B_I_PITEM (bh, ih) + path.pos_in_item, chars); + p += chars; + + if (PATH_LAST_POSITION (&path) != (B_NR_ITEMS (bh) - 1)) + // we done, if read direct item is not the last item of + // node FIXME: we could try to check right delimiting key + // to see whether direct item continues in the right + // neighbor or rely on i_size + break; + + // update key to look for the next piece + set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars); + if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) + // we read something from tail, even if now we got IO_ERROR + break; + bh = get_bh (&path); + ih = get_ih (&path); + } while (1); + + pathrelse (&path); + + // FIXME: b_blocknr == 0 here. but b_data contains correct data + // from tail. ll_rw_block will skip uptodate buffers + bh_result->b_blocknr = 0 ; + bh_result->b_dev = inode->i_dev; + mark_buffer_uptodate (bh_result, 1); + bh_result->b_state |= (1UL << BH_Mapped); + + return 0; +} + + +// this is called to create file map. So, _get_block_create_0 will not +// read direct item +int reiserfs_bmap (struct inode * inode, long block, + struct buffer_head * bh_result, int create) +{ + if (!file_capable (inode, block)) + return -EFBIG; + + lock_kernel() ; + /* do not read the direct item */ + _get_block_create_0 (inode, block, bh_result, 0) ; + unlock_kernel() ; + return 0; +} + +/* special version of get_block that is only used by grab_tail_page right +** now. It is sent to block_prepare_write, and when you try to get a +** block past the end of the file (or a block from a hole) it returns +** -ENOENT instead of a valid buffer. block_prepare_write expects to +** be able to do i/o on the buffers returned, unless an error value +** is also returned. +** +** So, this allows block_prepare_write to be used for reading a single block +** in a page. Where it does not produce a valid page for holes, or past the +** end of the file. This turns out to be exactly what we need for reading +** tails for conversion. +*/ +static int reiserfs_get_block_create_0 (struct inode * inode, long block, + struct buffer_head * bh_result, int create) { + return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ; +} + +/* +** helper function for when reiserfs_get_block is called for a hole +** but the file tail is still in a direct item +** bh_result is the buffer head for the hole +** tail_offset is the offset of the start of the tail in the file +** +** This calls prepare_write, which will start a new transaction +** you should not be in a transaction, or have any paths held when you +** call this. +*/ +static int convert_tail_for_hole(struct inode *inode, + struct buffer_head *bh_result, + loff_t tail_offset) { + unsigned long index ; + unsigned long tail_end ; + unsigned long tail_start ; + struct page * tail_page ; + struct page * hole_page = bh_result->b_page ; + int retval = 0 ; + + /* always try to read until the end of the block */ + tail_start = tail_offset & (bh_result->b_size - 1) ; + tail_end = bh_result->b_size ; + + if (tail_start != 1) + return -EIO ; + + index = tail_offset >> PAGE_CACHE_SHIFT ; + if (index != hole_page->index) { + tail_page = grab_cache_page(inode->i_mapping, index) ; + retval = PTR_ERR(tail_page) ; + if (IS_ERR(tail_page)) { + goto out ; + } + } else { + tail_page = hole_page ; + } + + /* we don't have to make sure the conversion did not happen while + ** we were locking the page because anyone that could convert + ** must first take i_sem. + ** + ** We must fix the tail page for writing because it might have buffers + ** that are mapped, but have a block number of 0. This indicates tail + ** data that has been read directly into the page, and block_prepare_write + ** won't trigger a get_block in this case. + */ + fix_tail_page_for_writing(tail_page) ; + retval = block_prepare_write(tail_page, tail_start, tail_end, + reiserfs_get_block) ; + if (retval) + goto unlock ; + + /* tail conversion might change the data in the page */ + flush_dcache_page(tail_page) ; + + retval = generic_commit_write(NULL, tail_page, tail_start, tail_end) ; + +unlock: + if (tail_page != hole_page) { + UnlockPage(tail_page) ; + page_cache_release(tail_page) ; + } +out: + return retval ; +} + +// +// initially this function was derived from minix or ext2's analog and +// evolved as the prototype did +// +int reiserfs_get_block (struct inode * inode, long block, + struct buffer_head * bh_result, int create) +{ + int repeat, retval; + unsigned long tag; + b_blocknr_t allocated_block_nr = 0;// b_blocknr_t is unsigned long + INITIALIZE_PATH(path); + int pos_in_item; + struct cpu_key key; + struct buffer_head * bh, * unbh = 0; + struct item_head * ih, tmp_ih; + __u32 * item; + int done; + int fs_gen; + int windex ; + struct reiserfs_transaction_handle th ; + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 ; + int version; + int transaction_started = 0 ; + loff_t new_offset = (block << inode->i_sb->s_blocksize_bits) + 1 ; + + lock_kernel() ; + th.t_trans_id = 0 ; + version = inode_items_version (inode); + + if (!file_capable (inode, block)) { + unlock_kernel() ; + return -EFBIG; + } + + /* if !create, we aren't changing the FS, so we don't need to + ** log anything, so we don't need to start a transaction + */ + if (!(create & GET_BLOCK_CREATE)) { + int ret ; + /* find number of block-th logical block of the file */ + ret = _get_block_create_0 (inode, block, bh_result, + create | GET_BLOCK_READ_DIRECT) ; + unlock_kernel() ; + return ret; + } + + if (block < 0) { + unlock_kernel(); + return -EIO; + } + + prevent_flush_page_lock(bh_result->b_page, inode) ; + inode->u.reiserfs_i.i_pack_on_close = 1 ; + + windex = push_journal_writer("reiserfs_get_block") ; + + /* set the key of the first byte in the 'block'-th block of file */ + make_cpu_key (&key, inode, + (loff_t)block * inode->i_sb->s_blocksize + 1, // k_offset + TYPE_ANY, 3/*key length*/); + if ((new_offset + inode->i_sb->s_blocksize) >= inode->i_size) { + journal_begin(&th, inode->i_sb, jbegin_count) ; + transaction_started = 1 ; + } + research: + + retval = search_for_position_by_key (inode->i_sb, &key, &path); + if (retval == IO_ERROR) { + retval = -EIO; + goto failure; + } + + bh = get_bh (&path); + ih = get_ih (&path); + item = get_item (&path); + pos_in_item = path.pos_in_item; + + fs_gen = get_generation (inode->i_sb); + copy_item_head (&tmp_ih, ih); + + if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) { + /* we have to allocate block for the unformatted node */ + tag = find_tag (bh, ih, item, pos_in_item); + if (!transaction_started) { + pathrelse(&path) ; + journal_begin(&th, inode->i_sb, jbegin_count) ; + transaction_started = 1 ; + goto research ; + } + +#ifdef REISERFS_PREALLOCATE + repeat = reiserfs_new_unf_blocknrs2 (&th, inode, &allocated_block_nr, tag); +#else + repeat = reiserfs_new_unf_blocknrs (&th, &allocated_block_nr, tag); +#endif + + if (repeat == NO_DISK_SPACE) { + /* restart the transaction to give the journal a chance to free + ** some blocks. releases the path, so we have to go back to + ** research if we succeed on the second try + */ + restart_transaction(&th, inode, &path) ; +#ifdef REISERFS_PREALLOCATE + repeat = reiserfs_new_unf_blocknrs2 (&th, inode, &allocated_block_nr, tag); +#else + repeat = reiserfs_new_unf_blocknrs (&th, &allocated_block_nr, tag); +#endif + + if (repeat != NO_DISK_SPACE) { + goto research ; + } + retval = -ENOSPC; + goto failure; + } + + if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { + goto research; + } + } + + if (indirect_item_found (retval, ih)) { + /* 'block'-th block is in the file already (there is + corresponding cell in some indirect item). But it may be + zero unformatted node pointer (hole) */ + if (!item[pos_in_item]) { + /* use allocated block to plug the hole */ + reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ; + if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { + reiserfs_restore_prepared_buffer(inode->i_sb, bh) ; + goto research; + } + bh_result->b_state |= (1UL << BH_New); + item[pos_in_item] = cpu_to_le32 (allocated_block_nr); + journal_mark_dirty (&th, inode->i_sb, bh); + inode->i_blocks += (inode->i_sb->s_blocksize / 512) ; + reiserfs_update_sd(&th, inode) ; + } + set_block_dev_mapped(bh_result, le32_to_cpu (item[pos_in_item]), inode); + pathrelse (&path); + pop_journal_writer(windex) ; + if (transaction_started) + journal_end(&th, inode->i_sb, jbegin_count) ; + + allow_flush_page_lock(bh_result->b_page, inode) ; + unlock_kernel() ; + + /* the item was found, so new blocks were not added to the file + ** there is no need to make sure the inode is updated with this + ** transaction + */ + return 0; + } + + if (!transaction_started) { + /* if we don't pathrelse, we could vs-3050 on the buffer if + ** someone is waiting for it (they can't finish until the buffer + ** is released, we can start a new transaction until they finish) + */ + pathrelse(&path) ; + journal_begin(&th, inode->i_sb, jbegin_count) ; + transaction_started = 1 ; + goto research; + } + + /* desired position is not found or is in the direct item. We have + to append file with holes up to 'block'-th block converting + direct items to indirect one if necessary */ + done = 0; + do { + if (is_statdata_le_ih (ih)) { + __u32 unp = 0; + struct cpu_key tmp_key; + + /* indirect item has to be inserted */ + make_le_item_head (&tmp_ih, &key, version, 1, TYPE_INDIRECT, + UNFM_P_SIZE, 0/* free_space */); + + if (cpu_key_k_offset (&key) == 1) { + /* we are going to add 'block'-th block to the file. Use + allocated block for that */ + unp = cpu_to_le32 (allocated_block_nr); + set_block_dev_mapped (bh_result, allocated_block_nr, inode); + bh_result->b_state |= (1UL << BH_New); + done = 1; + } + tmp_key = key; // ;) + set_cpu_key_k_offset (&tmp_key, 1); + PATH_LAST_POSITION(&path) ++; + + retval = reiserfs_insert_item (&th, &path, &tmp_key, &tmp_ih, (char *)&unp); + if (retval) { + reiserfs_free_block (&th, allocated_block_nr); + +#ifdef REISERFS_PREALLOCATE + reiserfs_discard_prealloc (&th, inode); +#endif + goto failure; // retval == -ENOSPC or -EIO or -EEXIST + } + if (unp) + inode->i_blocks += inode->i_sb->s_blocksize / 512; + //mark_tail_converted (inode); + } else if (is_direct_le_ih (ih)) { + /* direct item has to be converted */ + loff_t tail_offset; + + tail_offset = ((le_ih_k_offset (ih) - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; + if (tail_offset == cpu_key_k_offset (&key)) { + /* direct item we just found fits into block we have + to map. Convert it into unformatted node: use + bh_result for the conversion */ + set_block_dev_mapped (bh_result, allocated_block_nr, inode); + unbh = bh_result; + done = 1; + } else { + /* we have to padd file tail stored in direct item(s) + up to block size and convert it to unformatted + node. FIXME: this should also get into page cache */ + + pathrelse(&path) ; + journal_end(&th, inode->i_sb, jbegin_count) ; + transaction_started = 0 ; + + retval = convert_tail_for_hole(inode, bh_result, tail_offset) ; + if (retval) { + printk("clm-6004: covert tail failed inode %lu, error %d\n", inode->i_ino, retval) ; + if (allocated_block_nr) + reiserfs_free_block (&th, allocated_block_nr); + goto failure ; + } + goto research ; + } + retval = direct2indirect (&th, inode, &path, unbh, tail_offset); + /* it is important the mark_buffer_uptodate is done after + ** the direct2indirect. The buffer might contain valid + ** data newer than the data on disk (read by readpage, changed, + ** and then sent here by writepage). direct2indirect needs + ** to know if unbh was already up to date, so it can decide + ** if the data in unbh needs to be replaced with data from + ** the disk + */ + mark_buffer_uptodate (unbh, 1); + if (retval) { + reiserfs_free_block (&th, allocated_block_nr); + +#ifdef REISERFS_PREALLOCATE + reiserfs_discard_prealloc (&th, inode); +#endif + goto failure; + } + /* we've converted the tail, so we must + ** flush unbh before the transaction commits + */ + reiserfs_add_page_to_flush_list(&th, inode, unbh) ; + + //inode->i_blocks += inode->i_sb->s_blocksize / 512; + //mark_tail_converted (inode); + } else { + /* append indirect item with holes if needed, when appending + pointer to 'block'-th block use block, which is already + allocated */ + struct cpu_key tmp_key; + struct unfm_nodeinfo un = {0, 0}; + +#ifdef CONFIG_REISERFS_CHECK + if (pos_in_item != le16_to_cpu (ih->ih_item_len) / UNFM_P_SIZE) + reiserfs_panic (inode->i_sb, "vs-: reiserfs_get_block: " + "invalid position for append"); +#endif + /* indirect item has to be appended, set up key of that position */ + make_cpu_key (&tmp_key, inode, + le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize), + //pos_in_item * inode->i_sb->s_blocksize, + TYPE_INDIRECT, 3);// key type is unimportant + + if (cpu_key_k_offset (&tmp_key) == cpu_key_k_offset (&key)) { + /* we are going to add target block to the file. Use allocated + block for that */ + un.unfm_nodenum = cpu_to_le32 (allocated_block_nr); + set_block_dev_mapped (bh_result, allocated_block_nr, inode); + bh_result->b_state |= (1UL << BH_New); + done = 1; + } else { + /* paste hole to the indirect item */ + } + retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)&un, UNFM_P_SIZE); + if (retval) { + reiserfs_free_block (&th, allocated_block_nr); + +#ifdef REISERFS_PREALLOCATE + reiserfs_discard_prealloc (&th, inode); +#endif + goto failure; + } + if (un.unfm_nodenum) + inode->i_blocks += inode->i_sb->s_blocksize / 512; + //mark_tail_converted (inode); + } + + if (done == 1) + break; + + /* this loop could log more blocks than we had originally asked + ** for. So, we have to allow the transaction to end if it is + ** too big or too full. Update the inode so things are + ** consistent if we crash before the function returns + ** + ** release the path so that anybody waiting on the path before + ** ending their transaction will be able to continue. + */ + if (journal_transaction_should_end(&th, th.t_blocks_allocated)) { + restart_transaction(&th, inode, &path) ; + } + + retval = search_for_position_by_key (inode->i_sb, &key, &path); + if (retval == IO_ERROR) { + retval = -EIO; + goto failure; + } + if (retval == POSITION_FOUND) { + reiserfs_warning ("vs-: reiserfs_get_block: " + "%k should not be found", &key); + retval = -EEXIST; + pathrelse(&path) ; + goto failure; + } + bh = get_bh (&path); + ih = get_ih (&path); + item = get_item (&path); + pos_in_item = path.pos_in_item; + } while (1); + + + retval = 0; + reiserfs_check_path(&path) ; + + failure: + if (transaction_started) { + reiserfs_update_sd(&th, inode) ; + journal_end(&th, inode->i_sb, jbegin_count) ; + } + pop_journal_writer(windex) ; + allow_flush_page_lock(bh_result->b_page, inode) ; + unlock_kernel() ; + reiserfs_check_path(&path) ; + return retval; +} + + +// +// BAD: new directories have stat data of new type and all other items +// of old type. Version stored in the inode says about body items, so +// in update_stat_data we can not rely on inode, but have to check +// item version directly +// + +// called by read_inode +static void init_inode (struct inode * inode, struct path * path) +{ + struct buffer_head * bh; + struct item_head * ih; + __u32 rdev; + //int version = ITEM_VERSION_1; + + bh = PATH_PLAST_BUFFER (path); + ih = PATH_PITEM_HEAD (path); + + + copy_key (INODE_PKEY (inode), &(ih->ih_key)); + inode->i_generation = INODE_PKEY (inode)->k_dir_id; + inode->i_blksize = PAGE_SIZE; + + if (stat_data_v1 (ih)) { + struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih); + unsigned long blocks; + + inode_items_version (inode) = ITEM_VERSION_1; + inode->i_mode = le16_to_cpu (sd->sd_mode); + inode->i_nlink = le16_to_cpu (sd->sd_nlink); + inode->i_uid = le16_to_cpu (sd->sd_uid); + inode->i_gid = le16_to_cpu (sd->sd_gid); + inode->i_size = le32_to_cpu (sd->sd_size); + inode->i_atime = le32_to_cpu (sd->sd_atime); + inode->i_mtime = le32_to_cpu (sd->sd_mtime); + inode->i_ctime = le32_to_cpu (sd->sd_ctime); + + inode->i_blocks = le32_to_cpu (sd->u.sd_blocks); + blocks = (inode->i_size + 511) >> 9; + blocks = _ROUND_UP (blocks, inode->i_blksize >> 9); + if (inode->i_blocks > blocks) { + // there was a bug in <=3.5.23 when i_blocks could take negative + // values. Starting from 3.5.17 this value could even be stored in + // stat data. For such files we set i_blocks based on file + // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be + // only updated if file's inode will ever change + inode->i_blocks = blocks; + } + + rdev = le32_to_cpu (sd->u.sd_rdev); + inode->u.reiserfs_i.i_first_direct_byte = le32_to_cpu (sd->sd_first_direct_byte); + } else { + // new stat data found, but object may have old items + // (directories and symlinks) + struct stat_data * sd = (struct stat_data *)B_I_PITEM (bh, ih); + + /* both old and new directories have old keys */ + //version = (S_ISDIR (sd->sd_mode) ? ITEM_VERSION_1 : ITEM_VERSION_2); + if (S_ISDIR (sd->sd_mode) || S_ISLNK (sd->sd_mode)) + inode_items_version (inode) = ITEM_VERSION_1; + else + inode_items_version (inode) = ITEM_VERSION_2; + inode->i_mode = le16_to_cpu (sd->sd_mode); + inode->i_nlink = le32_to_cpu (sd->sd_nlink); + inode->i_uid = le32_to_cpu (sd->sd_uid); + inode->i_size = le64_to_cpu (sd->sd_size); + inode->i_gid = le32_to_cpu (sd->sd_gid); + inode->i_mtime = le32_to_cpu (sd->sd_mtime); + inode->i_atime = le32_to_cpu (sd->sd_atime); + inode->i_ctime = le32_to_cpu (sd->sd_ctime); + inode->i_blocks = le32_to_cpu (sd->sd_blocks); + rdev = le32_to_cpu (sd->u.sd_rdev); + } + + /* nopack = 0, by default */ + inode->u.reiserfs_i.nopack = 0; + + pathrelse (path); + if (S_ISREG (inode->i_mode)) { + inode->i_op = &reiserfs_file_inode_operations; + inode->i_fop = &reiserfs_file_operations; + inode->i_mapping->a_ops = &reiserfs_address_space_operations ; + } else if (S_ISDIR (inode->i_mode)) { + inode->i_op = &reiserfs_dir_inode_operations; + inode->i_fop = &reiserfs_dir_operations; + } else if (S_ISLNK (inode->i_mode)) { + inode->i_op = &page_symlink_inode_operations; + inode->i_mapping->a_ops = &reiserfs_address_space_operations; + } else { + inode->i_blocks = 0; + init_special_inode(inode, inode->i_mode, rdev) ; + } +} + + +// update new stat data with inode fields +static void inode2sd (void * sd, struct inode * inode) +{ + struct stat_data * sd_v2 = (struct stat_data *)sd; + + sd_v2->sd_mode = cpu_to_le16 (inode->i_mode); + sd_v2->sd_nlink = cpu_to_le16 (inode->i_nlink); + sd_v2->sd_uid = cpu_to_le32 (inode->i_uid); + sd_v2->sd_size = cpu_to_le64 (inode->i_size); + sd_v2->sd_gid = cpu_to_le32 (inode->i_gid); + sd_v2->sd_mtime = cpu_to_le32 (inode->i_mtime); + sd_v2->sd_atime = cpu_to_le32 (inode->i_atime); + sd_v2->sd_ctime = cpu_to_le32 (inode->i_ctime); + sd_v2->sd_blocks = cpu_to_le32 (inode->i_blocks); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + sd_v2->u.sd_rdev = cpu_to_le32 (inode->i_rdev); +} + + +// used to copy inode's fields to old stat data +static void inode2sd_v1 (void * sd, struct inode * inode) +{ + struct stat_data_v1 * sd_v1 = (struct stat_data_v1 *)sd; + + sd_v1->sd_mode = cpu_to_le16 (inode->i_mode); + sd_v1->sd_uid = cpu_to_le16 (inode->i_uid); + sd_v1->sd_gid = cpu_to_le16 (inode->i_gid); + sd_v1->sd_nlink = cpu_to_le16 (inode->i_nlink); + sd_v1->sd_size = cpu_to_le32 (inode->i_size); + sd_v1->sd_atime = cpu_to_le32 (inode->i_atime); + sd_v1->sd_ctime = cpu_to_le32 (inode->i_ctime); + sd_v1->sd_mtime = cpu_to_le32 (inode->i_mtime); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + sd_v1->u.sd_rdev = cpu_to_le32 (inode->i_rdev); + else + sd_v1->u.sd_blocks = cpu_to_le32 (inode->i_blocks); + + // Sigh. i_first_direct_byte is back + sd_v1->sd_first_direct_byte = cpu_to_le32 (inode->u.reiserfs_i.i_first_direct_byte); +} + + +/* NOTE, you must prepare the buffer head before sending it here, +** and then log it after the call +*/ +static void update_stat_data (struct path * path, struct inode * inode) +{ + struct buffer_head * bh; + struct item_head * ih; + + bh = PATH_PLAST_BUFFER (path); + ih = PATH_PITEM_HEAD (path); + + if (!is_statdata_le_ih (ih)) + reiserfs_panic (inode->i_sb, "vs-13065: update_stat_data: key %k, found item %h", + INODE_PKEY (inode), ih); + + if (stat_data_v1 (ih)) { + // path points to old stat data + inode2sd_v1 (B_I_PITEM (bh, ih), inode); + } else { + inode2sd (B_I_PITEM (bh, ih), inode); + } + + return; +} + + +void reiserfs_update_sd (struct reiserfs_transaction_handle *th, + struct inode * inode) +{ + struct cpu_key key; + INITIALIZE_PATH(path); + struct buffer_head *bh ; + int fs_gen ; + struct item_head *ih, tmp_ih ; + int retval; + + make_cpu_key (&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);//key type is unimportant + + for(;;) { + /* look for the object's stat data */ + retval = search_item (inode->i_sb, &key, &path); + if (retval == IO_ERROR) { + reiserfs_warning ("vs-13050: reiserfs_update_sd: " + "i/o failure occurred trying to update %K stat data", + &key); + return; + } + if (retval == ITEM_NOT_FOUND) { + pathrelse(&path) ; + if (inode->i_nlink == 0) { + /*printk ("vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found\n");*/ + return; + } + reiserfs_warning ("vs-13060: reiserfs_update_sd: " + "stat data of object %k (nlink == %d) not found (pos %d)\n", + INODE_PKEY (inode), inode->i_nlink); + reiserfs_check_path(&path) ; + return; + } + + /* sigh, prepare_for_journal might schedule. When it schedules the + ** FS might change. We have to detect that, and loop back to the + ** search if the stat data item has moved + */ + bh = get_bh(&path) ; + ih = get_ih(&path) ; + copy_item_head (&tmp_ih, ih); + fs_gen = get_generation (inode->i_sb); + reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ; + if (fs_changed (fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) { + reiserfs_restore_prepared_buffer(inode->i_sb, bh) ; + continue ; /* Stat_data item has been moved after scheduling. */ + } + break; + } + update_stat_data (&path, inode); + journal_mark_dirty(th, th->t_super, bh) ; + pathrelse (&path); + return; +} + +void reiserfs_read_inode(struct inode *inode) { + make_bad_inode(inode) ; +} + + +// +// initially this function was derived from minix or ext2's analog and +// evolved as the prototype did +// + +/* looks for stat data in the tree, and fills up the fields of in-core + inode stat data fields */ +void reiserfs_read_inode2 (struct inode * inode, void *p) +{ + INITIALIZE_PATH (path_to_sd); + struct cpu_key key; + struct reiserfs_iget4_args *args = (struct reiserfs_iget4_args *)p ; + unsigned long dirino; + int retval; + + if (!p) { + make_bad_inode(inode) ; + return; + } + + dirino = args->objectid ; + + /* set version 1, version 2 could be used too, because stat data + key is the same in both versions */ + key.version = ITEM_VERSION_1; + key.on_disk_key.k_dir_id = dirino; + key.on_disk_key.k_objectid = inode->i_ino; + key.on_disk_key.u.k_offset_v1.k_offset = SD_OFFSET; + key.on_disk_key.u.k_offset_v1.k_uniqueness = SD_UNIQUENESS; + + /* look for the object's stat data */ + retval = search_item (inode->i_sb, &key, &path_to_sd); + if (retval == IO_ERROR) { + reiserfs_warning ("vs-13070: reiserfs_read_inode2: " + "i/o failure occurred trying to find stat data of %K\n", + &key); + make_bad_inode(inode) ; + return; + } + if (retval != ITEM_FOUND) { + reiserfs_warning ("vs-13042: reiserfs_read_inode2: %K not found\n", &key); + pathrelse (&path_to_sd); + make_bad_inode(inode) ; + return; + } + + init_inode (inode, &path_to_sd); + reiserfs_check_path(&path_to_sd) ; /* init inode should be relsing */ + +} + + +struct inode * reiserfs_iget (struct super_block * s, struct cpu_key * key) +{ + struct inode * inode; + struct reiserfs_iget4_args args ; + + args.objectid = key->on_disk_key.k_dir_id ; + inode = iget4 (s, key->on_disk_key.k_objectid, 0, (void *)(&args)); + if (!inode) + return inode ; + + // if (comp_short_keys (INODE_PKEY (inode), key)) { + if (is_bad_inode (inode)) { + reiserfs_warning ("vs-13048: reiserfs_iget: " + "bad_inode. Stat data of (%lu %lu) not found\n", + key->on_disk_key.k_dir_id, key->on_disk_key.k_objectid); + iput (inode); + inode = 0; + } + return inode; +} + + +// +// initially this function was derived from minix or ext2's analog and +// evolved as the prototype did +// +/* looks for stat data, then copies fields to it, marks the buffer + containing stat data as dirty */ +void reiserfs_write_inode (struct inode * inode, int do_sync) { + int windex ; + struct reiserfs_transaction_handle th ; + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; + + lock_kernel() ; + journal_begin(&th, inode->i_sb, jbegin_count) ; + windex = push_journal_writer("write_inode") ; + reiserfs_update_sd (&th, inode); + pop_journal_writer(windex) ; + if (do_sync) + journal_end_sync(&th, inode->i_sb, jbegin_count) ; + else + journal_end(&th, inode->i_sb, jbegin_count) ; + unlock_kernel() ; +} + + +/* FIXME: no need any more. right? */ +int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode) +{ + int err = 0; + + reiserfs_update_sd (th, inode); + return err; +} + + +/* stat data of new object is inserted already, this inserts the item + containing "." and ".." entries */ +static int reiserfs_new_directory (struct reiserfs_transaction_handle *th, + struct item_head * ih, struct path * path, const struct inode * dir) +{ + struct super_block * sb = th->t_super; + char empty_dir [EMPTY_DIR_SIZE]; + char * body = empty_dir; + struct cpu_key key; + int retval; + + _make_cpu_key (&key, ITEM_VERSION_1, le32_to_cpu (ih->ih_key.k_dir_id), + le32_to_cpu (ih->ih_key.k_objectid), DOT_OFFSET, TYPE_DIRENTRY, 3/*key length*/); + + /* compose item head for new item. Directories consist of items of + old type (ITEM_VERSION_1). Do not set key (second arg is 0), it + is done by reiserfs_new_inode */ + if (old_format_only (sb)) { + make_le_item_head (ih, 0, ITEM_VERSION_1, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); + + make_empty_dir_item_v1 (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid, + le32_to_cpu (INODE_PKEY (dir)->k_dir_id), + le32_to_cpu (INODE_PKEY (dir)->k_objectid)); + } else { + make_le_item_head (ih, 0, ITEM_VERSION_1, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2); + + make_empty_dir_item (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid, + le32_to_cpu (INODE_PKEY (dir)->k_dir_id), + le32_to_cpu (INODE_PKEY (dir)->k_objectid)); + } + + /* look for place in the tree for new item */ + retval = search_item (sb, &key, path); + if (retval == IO_ERROR) { + reiserfs_warning ("vs-13080: reiserfs_new_directory: " + "i/o failure occured creating new directory\n"); + return -EIO; + } + if (retval == ITEM_FOUND) { + pathrelse (path); + reiserfs_warning ("vs-13070: reiserfs_new_directory: " + "object with this key exists (%k)", &(ih->ih_key)); + return -EEXIST; + } + + /* insert item, that is empty directory item */ + return reiserfs_insert_item (th, path, &key, ih, body); +} + + +/* stat data of object has been inserted, this inserts the item + containing the body of symlink */ +static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th, + struct item_head * ih, + struct path * path, const char * symname, int item_len) +{ + struct super_block * sb = th->t_super; + struct cpu_key key; + int retval; + + _make_cpu_key (&key, ITEM_VERSION_1, + le32_to_cpu (ih->ih_key.k_dir_id), + le32_to_cpu (ih->ih_key.k_objectid), + 1, TYPE_DIRECT, 3/*key length*/); + + make_le_item_head (ih, 0, ITEM_VERSION_1, 1, TYPE_DIRECT, item_len, 0/*free_space*/); + + /* look for place in the tree for new item */ + retval = search_item (sb, &key, path); + if (retval == IO_ERROR) { + reiserfs_warning ("vs-13080: reiserfs_new_symlinik: " + "i/o failure occured creating new symlink\n"); + return -EIO; + } + if (retval == ITEM_FOUND) { + pathrelse (path); + reiserfs_warning ("vs-13080: reiserfs_new_symlink: " + "object with this key exists (%k)", &(ih->ih_key)); + return -EEXIST; + } + + /* insert item, that is body of symlink */ + return reiserfs_insert_item (th, path, &key, ih, symname); +} + + +/* inserts the stat data into the tree, and then calls + reiserfs_new_directory (to insert ".", ".." item if new object is + directory) or reiserfs_new_symlink (to insert symlink body if new + object is symlink) or nothing (if new object is regular file) */ +struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th, + const struct inode * dir, int mode, + const char * symname, + int i_size, /* 0 for regular, EMTRY_DIR_SIZE for dirs, + strlen (symname) for symlinks)*/ + struct dentry *dentry, struct inode *inode, int * err) +{ + struct super_block * sb; + INITIALIZE_PATH (path_to_key); + struct cpu_key key; + struct item_head ih; + struct stat_data sd; + int retval; + + if (!dir || !dir->i_nlink) { + *err = -EPERM; + iput(inode) ; + return NULL; + } + + sb = dir->i_sb; + inode->i_sb = sb; + inode->i_flags = 0;//inode->i_sb->s_flags; + + /* item head of new item */ + ih.ih_key.k_dir_id = INODE_PKEY (dir)->k_objectid; + ih.ih_key.k_objectid = cpu_to_le32 (reiserfs_get_unused_objectid (th)); + if (!ih.ih_key.k_objectid) { + iput(inode) ; + *err = -ENOMEM; + return NULL; + } + if (old_format_only (sb)) + make_le_item_head (&ih, 0, ITEM_VERSION_1, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT); + else + make_le_item_head (&ih, 0, ITEM_VERSION_2, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); + + + /* key to search for correct place for new stat data */ + _make_cpu_key (&key, ITEM_VERSION_2, le32_to_cpu (ih.ih_key.k_dir_id), + le32_to_cpu (ih.ih_key.k_objectid), SD_OFFSET, TYPE_STAT_DATA, 3/*key length*/); + + /* find proper place for inserting of stat data */ + retval = search_item (sb, &key, &path_to_key); + if (retval == IO_ERROR) { + iput (inode); + *err = -EIO; + return NULL; + } + if (retval == ITEM_FOUND) { + pathrelse (&path_to_key); + iput (inode); + *err = -EEXIST; + return NULL; + } + + /* fill stat data */ + inode->i_mode = mode; + inode->i_nlink = (S_ISDIR (mode) ? 2 : 1); + inode->i_uid = current->fsuid; + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + inode->i_mode |= S_ISGID; + } else + inode->i_gid = current->fsgid; + + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_size = i_size; + inode->i_blocks = (inode->i_size + 511) >> 9; + inode->u.reiserfs_i.i_first_direct_byte = S_ISLNK(mode) ? 1 : + U32_MAX/*NO_BYTES_IN_DIRECT_ITEM*/; + + if (old_format_only (sb)) + inode2sd_v1 (&sd, inode); + else + inode2sd (&sd, inode); + + // these do not go to on-disk stat data + inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid); + inode->i_blksize = PAGE_SIZE; + inode->i_dev = sb->s_dev; + + // store in in-core inode the key of stat data and version all + // object items will have (directory items will have old offset + // format, other new objects will consist of new items) + memcpy (INODE_PKEY (inode), &(ih.ih_key), KEY_SIZE); + if (old_format_only (sb) || S_ISDIR(mode) || S_ISLNK(mode)) + inode_items_version (inode) = ITEM_VERSION_1; + else + inode_items_version (inode) = ITEM_VERSION_2; + + /* insert the stat data into the tree */ + retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, (char *)(&sd)); + if (retval) { + iput (inode); + *err = retval; + reiserfs_check_path(&path_to_key) ; + return NULL; + } + + if (S_ISDIR(mode)) { + /* insert item with "." and ".." */ + retval = reiserfs_new_directory (th, &ih, &path_to_key, dir); + } + + if (S_ISLNK(mode)) { + /* insert body of symlink */ + if (!old_format_only (sb)) + i_size = ROUND_UP(i_size); + retval = reiserfs_new_symlink (th, &ih, &path_to_key, symname, i_size); + } + if (retval) { + inode->i_nlink = 0; + iput (inode); + *err = retval; + reiserfs_check_path(&path_to_key) ; + return NULL; + } + + /* not a perfect generation count, as object ids can be reused, but this + ** is as good as reiserfs can do right now + */ + inode->i_generation = INODE_PKEY (inode)->k_dir_id; + insert_inode_hash (inode); + // we do not mark inode dirty: on disk content matches to the + // in-core one + reiserfs_check_path(&path_to_key) ; + return inode; +} + +/* +** finds the tail page in the page cache, +** reads the last block in. +** +** On success, page_result is set to a locked, pinned page, and bh_result +** is set to an up to date buffer for the last block in the file. returns 0. +** +** tail conversion is not done, so bh_result might not be valid for writing +** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before +** trying to write the block. +** +** on failure, nonzero is returned, page_result and bh_result are untouched. +*/ +static int grab_tail_page(struct inode *p_s_inode, + struct page **page_result, + struct buffer_head **bh_result) { + + /* we want the page with the last byte in the file, + ** not the page that will hold the next byte for appending + */ + unsigned long index = (p_s_inode->i_size-1) >> PAGE_CACHE_SHIFT ; + unsigned long pos = 0 ; + unsigned long start = 0 ; + unsigned long blocksize = p_s_inode->i_sb->s_blocksize ; + unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1) ; + struct buffer_head *bh ; + struct buffer_head *head ; + struct page * page ; + int error ; + + /* we know that we are only called with inode->i_size > 0. + ** we also know that a file tail can never be as big as a block + ** If i_size % blocksize == 0, our file is currently block aligned + ** and it won't need converting or zeroing after a truncate. + */ + if ((offset & (blocksize - 1)) == 0) { + return -ENOENT ; + } + page = grab_cache_page(p_s_inode->i_mapping, index) ; + error = PTR_ERR(page) ; + if (IS_ERR(page)) { + goto out ; + } + /* start within the page of the last block in the file */ + start = (offset / blocksize) * blocksize ; + + error = block_prepare_write(page, start, offset, + reiserfs_get_block_create_0) ; + kunmap(page) ; /* mapped by block_prepare_write */ + if (error) + goto unlock ; + + head = page->buffers ; + bh = head; + do { + if (pos >= start) { + break ; + } + bh = bh->b_this_page ; + pos += blocksize ; + } while(bh != head) ; + + if (!buffer_uptodate(bh)) { + /* note, this should never happen, prepare_write should + ** be taking care of this for us. If the buffer isn't up to date, + ** I've screwed up the code to find the buffer, or the code to + ** call prepare_write + */ + reiserfs_warning("clm-6000: error reading block %lu on dev %s\n", + bh->b_blocknr, kdevname(bh->b_dev)) ; + error = -EIO ; + goto unlock ; + } + *bh_result = bh ; + *page_result = page ; + +out: + return error ; + +unlock: + UnlockPage(page) ; + page_cache_release(page) ; + return error ; +} + +/* +** vfs version of truncate file. Must NOT be called with +** a transaction already started. +** +** some code taken from block_truncate_page +*/ +void reiserfs_truncate_file(struct inode *p_s_inode) { + struct reiserfs_transaction_handle th ; + int windex ; + + /* we want the offset for the first byte after the end of the file */ + unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1) ; + unsigned blocksize = p_s_inode->i_sb->s_blocksize ; + unsigned length ; + struct page *page = NULL ; + int error ; + struct buffer_head *bh = NULL ; + + if (p_s_inode->i_size > 0) { + if ((error = grab_tail_page(p_s_inode, &page, &bh))) { + // -ENOENT means we truncated past the end of the file, + // and get_block_create_0 could not find a block to read in, + // which is ok. + if (error != -ENOENT) + reiserfs_warning("clm-6001: grab_tail_page failed %d\n", error); + page = NULL ; + bh = NULL ; + } + } + + /* so, if page != NULL, we have a buffer head for the offset at + ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, + ** then we have an unformatted node. Otherwise, we have a direct item, + ** and no zeroing is required. We zero after the truncate, because the + ** truncate might pack the item anyway (it will unmap bh if it packs). + */ + prevent_flush_page_lock(page, p_s_inode) ; + journal_begin(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 ) ; + windex = push_journal_writer("resierfs_vfs_truncate_file") ; + reiserfs_do_truncate (&th, p_s_inode, page, 1/*update timestamps*/) ; + pop_journal_writer(windex) ; + journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 ) ; + allow_flush_page_lock(page, p_s_inode) ; + + if (page && buffer_mapped(bh) && bh->b_blocknr != 0) { + length = offset & (blocksize - 1) ; + /* if we are not on a block boundary */ + if (length) { + length = blocksize - length ; + memset((char *)kmap(page) + offset, 0, length) ; + flush_dcache_page(page) ; + kunmap(page) ; + mark_buffer_dirty(bh) ; + } + } + + if (page) { + UnlockPage(page) ; + page_cache_release(page) ; + } + return ; +} + +static int map_and_dirty_block(struct inode *inode, + struct buffer_head *bh_result, + unsigned long block) { + struct reiserfs_transaction_handle th ; + int fs_gen ; + struct item_head tmp_ih ; + struct item_head *ih ; + struct buffer_head *bh ; + __u32 *item ; + struct cpu_key key ; + INITIALIZE_PATH(path) ; + int pos_in_item ; + int jbegin_count = JOURNAL_PER_BALANCE_CNT ; + loff_t byte_offset = (block << inode->i_sb->s_blocksize_bits) + 1 ; + int retval ; + int use_get_block = 0 ; + int bytes_copied = 0 ; + int copy_size ; + +start_over: + lock_kernel() ; + prevent_flush_page_lock(bh_result->b_page, inode) ; + journal_begin(&th, inode->i_sb, jbegin_count) ; + + make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ; + +research: + retval = search_for_position_by_key(inode->i_sb, &key, &path) ; + if (retval != POSITION_FOUND) { + retval = -EIO ; + goto out ; + } + + bh = get_bh(&path) ; + ih = get_ih(&path) ; + item = get_item(&path) ; + pos_in_item = path.pos_in_item ; + fs_gen = get_generation(inode->i_sb) ; + copy_item_head(&tmp_ih, ih) ; + + /* we've found an unformatted node */ + if (indirect_item_found(retval, ih)) { + if (bytes_copied > 0) { + reiserfs_warning("clm-6002: bytes_copied %d\n", bytes_copied) ; + } + if (!item[pos_in_item]) { + /* crap, we are writing to a hole */ + use_get_block = 1; + goto out ; + } + set_block_dev_mapped(bh_result, le32_to_cpu(item[pos_in_item]), inode); +// printk("found indirect block offset %lu\n", block) ; + mark_buffer_dirty(bh_result) ; + } else if (is_direct_le_ih(ih)) { + copy_size = le16_to_cpu(ih->ih_item_len) - pos_in_item ; + memcpy( B_I_PITEM(bh, ih) + pos_in_item, + bh_result->b_data + bytes_copied, copy_size) ; + + journal_mark_dirty(&th, inode->i_sb, bh) ; + bytes_copied += copy_size ; + + /* are there still bytes left? */ + if (bytes_copied < bh_result->b_size && + (byte_offset + bytes_copied) < inode->i_size) { + set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + copy_size) ; + goto research ; + } + } else { + reiserfs_warning("clm-6003: bad item inode %lu, device %s\n", inode->i_ino, kdevname(inode->i_sb->s_dev)) ; + retval = -EIO ; + goto out ; + } + retval = 0 ; + +out: + pathrelse(&path) ; + journal_end(&th, inode->i_sb, jbegin_count) ; + allow_flush_page_lock(bh_result->b_page, inode) ; + unlock_kernel() ; + + if (use_get_block) { +// printk("plugging hole, block offset %lu\n", block) ; + retval = reiserfs_get_block(inode, block, bh_result, 1) ; + if (!retval) { + if (buffer_mapped(bh_result) && bh_result->b_blocknr != 0) { + mark_buffer_dirty(bh_result) ; + } else { + /* get_block failed to find a mapped formatted node. */ + use_get_block = 0 ; + goto start_over ; + } + } + } + return retval ; +} + +static int reiserfs_write_full_page(struct page *page) { + struct inode *inode = (struct inode *)page->mapping->host ; + unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ; + unsigned last_offset = PAGE_CACHE_SIZE; + int error = 0; + unsigned long block ; + unsigned cur_offset = 0 ; + struct buffer_head *head, *bh ; + int partial = 0 ; + + if (!page->buffers) { + block_prepare_write(page, 0, 0, NULL) ; + kunmap(page) ; + } + /* last page in the file */ + if (page->index >= end_index) { + last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ; + /* no file contents in this page */ + if (page->index >= end_index + 1 || !last_offset) { + return -EIO ; + } + memset((char *)kmap(page)+last_offset, 0, PAGE_CACHE_SIZE-last_offset) ; + flush_dcache_page(page) ; + kunmap(page) ; + } + head = page->buffers ; + bh = head ; + block = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits) ; + do { + /* if this offset in the page is outside the file */ + if (cur_offset >= last_offset) { + if (!buffer_uptodate(bh)) + partial = 1 ; + } else { + /* this end_io handler is exactly the same as end_buffer_io_sync */ + bh->b_end_io = reiserfs_journal_end_io ; + + /* buffer mapped to an unformatted node */ + if (buffer_mapped(bh) && bh->b_blocknr != 0) { + mark_buffer_dirty(bh) ; + } else { + /* buffer not mapped yet, or points to a direct item. + ** search and dirty or log + */ + if ((error = map_and_dirty_block(inode, bh, block))) { + goto fail ; + } + } + } + bh = bh->b_this_page ; + cur_offset += bh->b_size ; + block++ ; + } while(bh != head) ; + + if (!partial) + SetPageUptodate(page) ; + + return 0 ; + +fail: + ClearPageUptodate(page) ; + return error ; +} + +// +// this is exactly what 2.3.99-pre9's ext2_readpage is +// +static int reiserfs_readpage (struct file *f, struct page * page) +{ + return block_read_full_page (page, reiserfs_get_block); +} + + +// +// modified from ext2_writepage is +// +static int reiserfs_writepage (struct file *f, struct page * page) +{ + return reiserfs_write_full_page(page) ; +} + + +// +// from ext2_prepare_write, but modified +// +static int reiserfs_prepare_write(struct file *f, struct page *page, unsigned from, unsigned to) { + fix_tail_page_for_writing(page) ; + return block_prepare_write(page, from, to, reiserfs_get_block) ; +} + + +// +// this is exactly what 2.3.99-pre9's ext2_bmap is +// +static int reiserfs_aop_bmap(struct address_space *as, long block) { + return generic_block_bmap(as, block, reiserfs_bmap) ; +} + + +static int reiserfs_commit_write(struct file *f, struct page *page, + unsigned from, unsigned to) { + struct inode *inode = (struct inode *)(page->mapping->host) ; + int ret ; + + prevent_flush_page_lock(page, inode) ; + ret = generic_commit_write(f, page, from, to) ; + allow_flush_page_lock(page, inode) ; + return ret ; +} + +struct address_space_operations reiserfs_address_space_operations = { + writepage: reiserfs_writepage, + readpage: reiserfs_readpage, + sync_page: block_sync_page, + prepare_write: reiserfs_prepare_write, + commit_write: reiserfs_commit_write, + bmap: reiserfs_aop_bmap +} ; diff -u -r --new-file linux/fs/reiserfs/ioctl.c v2.4.0-test8/linux/fs/reiserfs/ioctl.c --- linux/fs/reiserfs/ioctl.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/ioctl.c Sun Aug 20 02:47:22 2000 @@ -0,0 +1,127 @@ +/* + * Copyright 2000 Hans Reiser, see reiserfs/README for licensing and copyright details + */ + +#ifdef __KERNEL__ + +#include <linux/fs.h> +#include <linux/reiserfs_fs.h> +#include <linux/sched.h> +#include <asm/uaccess.h> +#include <linux/smp_lock.h> + +#else + +#include "nokernel.h" + +#endif + + +/* +** reiserfs_ioctl - handler for ioctl for inode +** supported commands: +** 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect +** and prevent packing file (argument arg has to be non-zero) +** 2) That's all for a while ... +*/ +int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case REISERFS_IOC_UNPACK: + if (arg) + return reiserfs_unpack (inode, filp); + + default: + return -ENOTTY; + } +} + + +/* +** reiserfs_unpack +** Function try to convert tail from direct item into indirect. +** It set up nopack attribute in the inode.u.reiserfs_i.nopack +*/ +int reiserfs_unpack (struct inode * inode, struct file * filp) +{ + int retval, windex, exitcode; + INITIALIZE_PATH(path); + struct cpu_key key; + struct buffer_head * bh, * unbh = 0; + struct item_head * ih; + loff_t tail_offs; + unsigned int t1; + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 ; + struct reiserfs_transaction_handle th ; + + lock_kernel(); + + /* check is inode has 'new' statdata */ + if (inode_items_version (inode) == ITEM_VERSION_1) + return -EINVAL; + + journal_begin(&th, inode->i_sb, jbegin_count); + windex = push_journal_writer("reiserfs_ioctl_unpack"); + exitcode = 0; + +search_again: + /* find file's tail */ + t1 = ~(inode->i_sb->s_blocksize - 1); + make_cpu_key (&key, inode, (inode->i_size & t1) + 1, TYPE_DIRECT, 3); + retval = search_for_position_by_key (inode->i_sb, &key, &path); + + if (retval == POSITION_NOT_FOUND) { + if (unbh != NULL) { + unsigned long tmp = unbh->b_blocknr; + bforget (unbh); + /* free what has been allocated by get_new_buffer */ + reiserfs_free_block (&th, tmp); + } + } else { + ih = get_ih (&path); + /* if last item is direct then we have to convert it to indirect */ + if (is_direct_le_ih (ih)) { + bh = get_bh (&path); + /* allocate new unformatted node to place tail */ + if (!unbh) { + retval = get_new_buffer (&th, bh, &unbh, &path); + if (!unbh) { + /* can't allocate block */ + pathrelse (&path); + exitcode = -ENOSPC; + goto finish; + } + if (retval & SCHEDULE_OCCURRED) { + /* get_new_buffer was blocked and path had ability to change */ + pathrelse (&path); + restart_transaction(&th, inode, &path); + goto search_again; + } + } + tail_offs = ((le_ih_k_offset (ih) - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; + mark_buffer_uptodate (unbh, 1); + /* try to convert direct item into indirect */ + retval = direct2indirect (&th, inode, &path, unbh, tail_offs); + if (retval) { + /* direct2indirect() did not convert item */ + unsigned long tmp = unbh->b_blocknr; + bforget (unbh); + /* free what has been allocated by get_new_buffer */ + reiserfs_free_block (&th, tmp); + inode->u.reiserfs_i.nopack = 0; + exitcode = -ENOSPC; /* FIXME: what error has to be returned here? -az */ + goto finish; + } + brelse (unbh); + } + pathrelse (&path); + } + /* don't pack tail anymore */ + inode->u.reiserfs_i.nopack = 1; +finish: + pop_journal_writer(windex); + journal_end(&th, inode->i_sb, jbegin_count); + unlock_kernel(); + return exitcode; +} diff -u -r --new-file linux/fs/reiserfs/item_ops.c v2.4.0-test8/linux/fs/reiserfs/item_ops.c --- linux/fs/reiserfs/item_ops.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/item_ops.c Sat Aug 12 01:46:36 2000 @@ -0,0 +1,715 @@ +/* + * Copyright 2000 Hans Reiser, see reiserfs/README for licensing and copyright details + */ + +#ifdef __KERNEL__ + +#include <linux/sched.h> +#include <linux/reiserfs_fs.h> + +#else + +#include "nokernel.h" + +#endif + + +// this conatins item hadlers for old item types: sd, direct, +// indirect, directory + + +////////////////////////////////////////////////////////////////////////////// +// stat data functions +// +static int sd_bytes_number (struct item_head * ih, int block_size) +{ + return 0; +} + +static void sd_decrement_key (struct cpu_key * key) +{ + key->on_disk_key.k_objectid --; +} + +static int sd_is_left_mergeable (struct key * key, unsigned long bsize) +{ + return 0; +} + + + +static char * print_time (time_t t) +{ + static char timebuf[256]; + +#ifndef __KERNEL__ +// struct tm *loctime; +// loctime = localtime (&t); + sprintf (timebuf, "%s", asctime (localtime (&t))); + timebuf[strlen (timebuf) - 1] = 0; +#else + sprintf (timebuf, "%ld", t); +#endif + return timebuf; +} + + +static void sd_print_item (struct item_head * ih, char * item) +{ + printk ("\tmode | size | nlinks | first direct | mtime\n"); + if (stat_data_v1 (ih)) { + struct stat_data_v1 * sd = (struct stat_data_v1 *)item; + + printk ("\t0%-6o | %6u | %2u | %d | %s\n", sd->sd_mode, sd->sd_size, + sd->sd_nlink, sd->sd_first_direct_byte, print_time (sd->sd_mtime)); + } else { + struct stat_data * sd = (struct stat_data *)item; + + printk ("\t0%-6o | %6Lu | %2u | %d | %s\n", sd->sd_mode, sd->sd_size, + sd->sd_nlink, sd->u.sd_rdev, print_time (sd->sd_mtime)); + } +} + +static void sd_check_item (struct item_head * ih, char * item) +{ + // FIXME: type something here! +} + + +static int sd_create_vi (struct virtual_node * vn, + struct virtual_item * vi, + int is_affected, + int insert_size) +{ + vi->vi_index = TYPE_STAT_DATA; + //vi->vi_type |= VI_TYPE_STAT_DATA;// not needed? + return 0; +} + + +static int sd_check_left (struct virtual_item * vi, int free, + int start_skip, int end_skip) +{ + if (start_skip || end_skip) + BUG (); + return -1; +} + + +static int sd_check_right (struct virtual_item * vi, int free) +{ + return -1; +} + +static int sd_part_size (struct virtual_item * vi, int first, int count) +{ + if (count) + BUG (); + return 0; +} + +static int sd_unit_num (struct virtual_item * vi) +{ + return vi->vi_item_len - IH_SIZE; +} + + +static void sd_print_vi (struct virtual_item * vi) +{ + reiserfs_warning ("STATDATA, index %d, type 0x%x, %h\n", + vi->vi_index, vi->vi_type, vi->vi_ih); +} + +struct item_operations stat_data_ops = { + sd_bytes_number, + sd_decrement_key, + sd_is_left_mergeable, + sd_print_item, + sd_check_item, + + sd_create_vi, + sd_check_left, + sd_check_right, + sd_part_size, + sd_unit_num, + sd_print_vi +}; + + + +////////////////////////////////////////////////////////////////////////////// +// direct item functions +// +static int direct_bytes_number (struct item_head * ih, int block_size) +{ + return le16_to_cpu (ih->ih_item_len); +} + + +// FIXME: this should probably switch to indirect as well +static void direct_decrement_key (struct cpu_key * key) +{ + cpu_key_k_offset_dec (key); + if (cpu_key_k_offset (key) == 0) + set_cpu_key_k_type (key, TYPE_STAT_DATA); +} + + +static int direct_is_left_mergeable (struct key * key, unsigned long bsize) +{ + int version = le_key_version (key); + return ((le_key_k_offset (version, key) & (bsize - 1)) != 1); +} + + +static void direct_print_item (struct item_head * ih, char * item) +{ + int j = 0; + +// return; + printk ("\""); + while (j < ih->ih_item_len) + printk ("%c", item[j++]); + printk ("\"\n"); +} + + +static void direct_check_item (struct item_head * ih, char * item) +{ + // FIXME: type something here! +} + + +static int direct_create_vi (struct virtual_node * vn, + struct virtual_item * vi, + int is_affected, + int insert_size) +{ + vi->vi_index = TYPE_DIRECT; + //vi->vi_type |= VI_TYPE_DIRECT; + return 0; +} + +static int direct_check_left (struct virtual_item * vi, int free, + int start_skip, int end_skip) +{ + int bytes; + + bytes = free - free % 8; + return bytes ?: -1; +} + + +static int direct_check_right (struct virtual_item * vi, int free) +{ + return direct_check_left (vi, free, 0, 0); +} + +static int direct_part_size (struct virtual_item * vi, int first, int count) +{ + return count; +} + + +static int direct_unit_num (struct virtual_item * vi) +{ + return vi->vi_item_len - IH_SIZE; +} + + +static void direct_print_vi (struct virtual_item * vi) +{ + reiserfs_warning ("DIRECT, index %d, type 0x%x, %h\n", + vi->vi_index, vi->vi_type, vi->vi_ih); +} + +struct item_operations direct_ops = { + direct_bytes_number, + direct_decrement_key, + direct_is_left_mergeable, + direct_print_item, + direct_check_item, + + direct_create_vi, + direct_check_left, + direct_check_right, + direct_part_size, + direct_unit_num, + direct_print_vi +}; + + + +////////////////////////////////////////////////////////////////////////////// +// indirect item functions +// + +static int indirect_bytes_number (struct item_head * ih, int block_size) +{ + return le16_to_cpu (ih->ih_item_len) / UNFM_P_SIZE * block_size; //- get_ih_free_space (ih); +} + + +// decrease offset, if it becomes 0, change type to stat data +static void indirect_decrement_key (struct cpu_key * key) +{ + cpu_key_k_offset_dec (key); + if (cpu_key_k_offset (key) == 0) + set_cpu_key_k_type (key, TYPE_STAT_DATA); +} + + +// if it is not first item of the body, then it is mergeable +static int indirect_is_left_mergeable (struct key * key, unsigned long bsize) +{ + int version = le_key_version (key); + return (le_key_k_offset (version, key) != 1); +} + + +// printing of indirect item +static void start_new_sequence (__u32 * start, int * len, __u32 new) +{ + *start = new; + *len = 1; +} + + +static int sequence_finished (__u32 start, int * len, __u32 new) +{ + if (start == INT_MAX) + return 1; + + if (start == 0 && new == 0) { + (*len) ++; + return 0; + } + if (start != 0 && (start + *len) == new) { + (*len) ++; + return 0; + } + return 1; +} + +static void print_sequence (__u32 start, int len) +{ + if (start == INT_MAX) + return; + + if (len == 1) + printk (" %d", start); + else + printk (" %d(%d)", start, len); +} + + +static void indirect_print_item (struct item_head * ih, char * item) +{ + int j; + __u32 * unp, prev = INT_MAX; + int num; + + unp = (__u32 *)item; + + if (ih->ih_item_len % UNFM_P_SIZE) + printk ("indirect_print_item: invalid item len"); + + printk ("%d pointers\n[ ", I_UNFM_NUM (ih)); + for (j = 0; j < I_UNFM_NUM (ih); j ++) { + if (sequence_finished (prev, &num, unp[j])) { + print_sequence (prev, num); + start_new_sequence (&prev, &num, unp[j]); + } + } + print_sequence (prev, num); + printk ("]\n"); +} + +static void indirect_check_item (struct item_head * ih, char * item) +{ + // FIXME: type something here! +} + + +static int indirect_create_vi (struct virtual_node * vn, + struct virtual_item * vi, + int is_affected, + int insert_size) +{ + vi->vi_index = TYPE_INDIRECT; + //vi->vi_type |= VI_TYPE_INDIRECT; + return 0; +} + +static int indirect_check_left (struct virtual_item * vi, int free, + int start_skip, int end_skip) +{ + int bytes; + + bytes = free - free % UNFM_P_SIZE; + return bytes ?: -1; +} + + +static int indirect_check_right (struct virtual_item * vi, int free) +{ + return indirect_check_left (vi, free, 0, 0); +} + + + +// return size in bytes of 'units' units. If first == 0 - calculate from the head, othewise - form tail +static int indirect_part_size (struct virtual_item * vi, int first, int units) +{ + // unit of indirect item is byte (yet) + return units; +} + +static int indirect_unit_num (struct virtual_item * vi) +{ + // unit of indirect item is byte (yet) + return vi->vi_item_len - IH_SIZE; +} + +static void indirect_print_vi (struct virtual_item * vi) +{ + reiserfs_warning ("INDIRECT, index %d, type 0x%x, %h\n", + vi->vi_index, vi->vi_type, vi->vi_ih); +} + +struct item_operations indirect_ops = { + indirect_bytes_number, + indirect_decrement_key, + indirect_is_left_mergeable, + indirect_print_item, + indirect_check_item, + + indirect_create_vi, + indirect_check_left, + indirect_check_right, + indirect_part_size, + indirect_unit_num, + indirect_print_vi +}; + + +////////////////////////////////////////////////////////////////////////////// +// direntry functions +// + + +static int direntry_bytes_number (struct item_head * ih, int block_size) +{ + reiserfs_warning ("vs-16090: direntry_bytes_number: " + "bytes number is asked for direntry"); + return 0; +} + +static void direntry_decrement_key (struct cpu_key * key) +{ + cpu_key_k_offset_dec (key); + if (cpu_key_k_offset (key) == 0) + set_cpu_key_k_type (key, TYPE_STAT_DATA); +} + + +static int direntry_is_left_mergeable (struct key * key, unsigned long bsize) +{ + if (le32_to_cpu (key->u.k_offset_v1.k_offset) == DOT_OFFSET) + return 0; + return 1; + +} + + +static void direntry_print_item (struct item_head * ih, char * item) +{ + int i; + int namelen; + struct reiserfs_de_head * deh; + char * name; + static char namebuf [80]; + + + printk ("\n # %-15s%-30s%-15s%-15s%-15s\n", "Name", "Key of pointed object", "Hash", "Gen number", "Status"); + + deh = (struct reiserfs_de_head *)item; + + for (i = 0; i < I_ENTRY_COUNT (ih); i ++, deh ++) { + namelen = (i ? ((deh - 1)->deh_location) : ih->ih_item_len) - deh->deh_location; + name = item + deh->deh_location; + if (name[namelen-1] == 0) + namelen = strlen (name); + namebuf[0] = '"'; + if (namelen > sizeof (namebuf) - 3) { + strncpy (namebuf + 1, name, sizeof (namebuf) - 3); + namebuf[sizeof (namebuf) - 2] = '"'; + namebuf[sizeof (namebuf) - 1] = 0; + } else { + memcpy (namebuf + 1, name, namelen); + namebuf[namelen + 1] = '"'; + namebuf[namelen + 2] = 0; + } + + printk ("%d: %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n", + i, namebuf, + deh->deh_dir_id, deh->deh_objectid, + GET_HASH_VALUE (deh_offset (deh)), GET_GENERATION_NUMBER ((deh_offset (deh))), + (de_hidden (deh)) ? "HIDDEN" : "VISIBLE"); + } +} + + +static void direntry_check_item (struct item_head * ih, char * item) +{ + int i; + struct reiserfs_de_head * deh; + + // FIXME: type something here! + deh = (struct reiserfs_de_head *)item; + for (i = 0; i < I_ENTRY_COUNT (ih); i ++, deh ++) { + ; + } +} + + + +#define DIRENTRY_VI_FIRST_DIRENTRY_ITEM 1 + +struct direntry_uarea { + int flags; + short entry_count; + short entry_sizes[1]; +}; + + +/* + * function returns old entry number in directory item in real node + * using new entry number in virtual item in virtual node */ +static inline int old_entry_num (int is_affected, int virtual_entry_num, int pos_in_item, int mode) +{ + if ( mode == M_INSERT || mode == M_DELETE) + return virtual_entry_num; + + if (!is_affected) + /* cut or paste is applied to another item */ + return virtual_entry_num; + + if (virtual_entry_num < pos_in_item) + return virtual_entry_num; + + if (mode == M_CUT) + return virtual_entry_num + 1; + +#ifdef CONFIG_REISERFS_CHECK + if (mode != M_PASTE || virtual_entry_num == 0) + reiserfs_panic (0, "vs-8015: old_entry_num: mode must be M_PASTE (mode = \'%c\'", mode); +#endif + + return virtual_entry_num - 1; +} + + + + +/* Create an array of sizes of directory entries for virtual + item. Return space used by an item. FIXME: no control over + consuming of space used by this item handler */ +static int direntry_create_vi (struct virtual_node * vn, + struct virtual_item * vi, + int is_affected, + int insert_size) +{ + struct direntry_uarea * dir_u = vi->vi_uarea; + int i, j; + int size = sizeof (struct direntry_uarea); + struct reiserfs_de_head * deh; + + vi->vi_index = TYPE_DIRENTRY; + + if (!(vi->vi_ih) || !vi->vi_item) + BUG (); + + + dir_u->flags = 0; + if (le_ih_k_offset (vi->vi_ih) == DOT_OFFSET) + dir_u->flags |= DIRENTRY_VI_FIRST_DIRENTRY_ITEM; + + deh = (struct reiserfs_de_head *)(vi->vi_item); + + + /* virtual directory item have this amount of entry after */ + dir_u->entry_count = ih_entry_count (vi->vi_ih) + + ((is_affected) ? ((vn->vn_mode == M_CUT) ? -1 : + (vn->vn_mode == M_PASTE ? 1 : 0)) : 0); + + for (i = 0; i < dir_u->entry_count; i ++) { + j = old_entry_num (is_affected, i, vn->vn_pos_in_item, vn->vn_mode); + dir_u->entry_sizes[i] = (j ? le16_to_cpu (deh[j - 1].deh_location) : le16_to_cpu (vi->vi_ih->ih_item_len)) - + le16_to_cpu (deh[j].deh_location) + DEH_SIZE; + } + + size += (dir_u->entry_count * sizeof (short)); + + /* set size of pasted entry */ + if (is_affected && vn->vn_mode == M_PASTE) + dir_u->entry_sizes[vn->vn_pos_in_item] = insert_size; + + +#ifdef CONFIG_REISERFS_CHECK + /* compare total size of entries with item length */ + { + int k, l; + + l = 0; + for (k = 0; k < dir_u->entry_count; k ++) + l += dir_u->entry_sizes[k]; + + if (l + IH_SIZE != vi->vi_item_len + + ((is_affected && (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT)) ? insert_size : 0) ) { + reiserfs_panic (0, "vs-8025: set_entry_sizes: (mode==%c, insert_size==%d), invalid length of directory item", + vn->vn_mode, insert_size); + } + } +#endif + + return size; + + +} + + +// +// return number of entries which may fit into specified amount of +// free space, or -1 if free space is not enough even for 1 entry +// +static int direntry_check_left (struct virtual_item * vi, int free, + int start_skip, int end_skip) +{ + int i; + int entries = 0; + struct direntry_uarea * dir_u = vi->vi_uarea; + + for (i = start_skip; i < dir_u->entry_count - end_skip; i ++) { + if (dir_u->entry_sizes[i] > free) + /* i-th entry doesn't fit into the remaining free space */ + break; + + free -= dir_u->entry_sizes[i]; + entries ++; + } + + if (entries == dir_u->entry_count) { + printk ("free spze %d, entry_count %d\n", free, dir_u->entry_count); + BUG (); + } + + /* "." and ".." can not be separated from each other */ + if (start_skip == 0 && (dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) && entries < 2) + entries = 0; + + return entries ?: -1; +} + + +static int direntry_check_right (struct virtual_item * vi, int free) +{ + int i; + int entries = 0; + struct direntry_uarea * dir_u = vi->vi_uarea; + + for (i = dir_u->entry_count - 1; i >= 0; i --) { + if (dir_u->entry_sizes[i] > free) + /* i-th entry doesn't fit into the remaining free space */ + break; + + free -= dir_u->entry_sizes[i]; + entries ++; + } + if (entries == dir_u->entry_count) + BUG (); + + /* "." and ".." can not be separated from each other */ + if ((dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) && entries > dir_u->entry_count - 2) + entries = dir_u->entry_count - 2; + + return entries ?: -1; +} + + +/* sum of entry sizes between from-th and to-th entries including both edges */ +static int direntry_part_size (struct virtual_item * vi, int first, int count) +{ + int i, retval; + int from, to; + struct direntry_uarea * dir_u = vi->vi_uarea; + + retval = 0; + if (first == 0) + from = 0; + else + from = dir_u->entry_count - count; + to = from + count - 1; + + for (i = from; i <= to; i ++) + retval += dir_u->entry_sizes[i]; + + return retval; +} + +static int direntry_unit_num (struct virtual_item * vi) +{ + struct direntry_uarea * dir_u = vi->vi_uarea; + + return dir_u->entry_count; +} + + + +static void direntry_print_vi (struct virtual_item * vi) +{ + int i; + struct direntry_uarea * dir_u = vi->vi_uarea; + + reiserfs_warning ("DIRENTRY, index %d, type 0x%x, %h, flags 0x%x\n", + vi->vi_index, vi->vi_type, vi->vi_ih, dir_u->flags); + printk ("%d entries: ", dir_u->entry_count); + for (i = 0; i < dir_u->entry_count; i ++) + printk ("%d ", dir_u->entry_sizes[i]); + printk ("\n"); +} + + +struct item_operations direntry_ops = { + direntry_bytes_number, + direntry_decrement_key, + direntry_is_left_mergeable, + direntry_print_item, + direntry_check_item, + + direntry_create_vi, + direntry_check_left, + direntry_check_right, + direntry_part_size, + direntry_unit_num, + direntry_print_vi +}; + + +////////////////////////////////////////////////////////////////////////////// +// +// +#if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3) + do not compile +#endif + +struct item_operations * item_ops [4] = { + &stat_data_ops, + &indirect_ops, + &direct_ops, + &direntry_ops +}; + + + + diff -u -r --new-file linux/fs/reiserfs/journal.c v2.4.0-test8/linux/fs/reiserfs/journal.c --- linux/fs/reiserfs/journal.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/journal.c Mon Sep 11 05:21:49 2000 @@ -0,0 +1,3152 @@ +/* +** Write ahead logging implementation copyright Chris Mason 2000 +** +** +** The background commits make this code very interelated, and +** overly complex. I need to rethink things a bit....The major players: +** +** journal_begin -- call with the number of blocks you expect to log. +** If the current transaction is too +** old, it will block until the current transaction is +** finished, and then start a new one. +** Usually, your transaction will get joined in with +** previous ones for speed. +** +** journal_join -- same as journal_begin, but won't block on the current +** transaction regardless of age. Don't ever call +** this. Ever. There are only two places it should be +** called from, and they are both inside this file. +** +** journal_mark_dirty -- adds blocks into this transaction. clears any flags +** that might make them get sent to disk +** and then marks them BH_JDirty. Puts the buffer head +** into the current transaction hash. +** +** journal_end -- if the current transaction is batchable, it does nothing +** otherwise, it could do an async/synchronous commit, or +** a full flush of all log and real blocks in the +** transaction. +** +** flush_old_commits -- if the current transaction is too old, it is ended and +** commit blocks are sent to disk. Forces commit blocks +** to disk for all backgrounded commits that have been +** around too long. +** -- Note, if you call this as an immediate flush from +** from within kupdate, it will ignore the immediate flag +** +** The commit thread -- a writer process for async commits. It allows a +** a process to request a log flush on a task queue. +** the commit will happen once the commit thread wakes up. +** The benefit here is the writer (with whatever +** related locks it has) doesn't have to wait for the +** log blocks to hit disk if it doesn't want to. +*/ + +#ifdef __KERNEL__ + +#include <asm/uaccess.h> +#include <asm/system.h> + +#include <linux/sched.h> +#include <asm/semaphore.h> + +#include <linux/vmalloc.h> +#include <linux/reiserfs_fs.h> + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/locks.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/smp_lock.h> + +#else + +#include "nokernel.h" + +#endif + +/* the number of mounted filesystems. This is used to decide when to +** start and kill the commit thread +*/ +static int reiserfs_mounted_fs_count = 0 ; + +/* wake this up when you add something to the commit thread task queue */ +DECLARE_WAIT_QUEUE_HEAD(reiserfs_commit_thread_wait) ; + +/* wait on this if you need to be sure you task queue entries have been run */ +static DECLARE_WAIT_QUEUE_HEAD(reiserfs_commit_thread_done) ; + +/* task queue for async commits, and for the end_io tasks that can't +** be done with interrupts turned off +** +** tasks put on this queue will be run by the commit thread +*/ +DECLARE_TASK_QUEUE(reiserfs_commit_thread_tq) ; +DECLARE_TASK_QUEUE(reiserfs_end_io_tq) ; +DECLARE_MUTEX(reiserfs_end_io_sem) ; + +#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit structs at 4k */ + +/* cnode stat bits. Move these into reiserfs_fs.h */ + +#define BLOCK_FREED 2 /* this block was freed, and can't be written. */ +#define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ + +#define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ + +/* flags for do_journal_end */ +#define FLUSH_ALL 1 /* flush commit and real blocks */ +#define COMMIT_NOW 2 /* end and commit this transaction */ +#define WAIT 4 /* wait for the log blocks to hit the disk*/ + +static int do_journal_end(struct reiserfs_transaction_handle *,struct super_block *,unsigned long nblocks,int flags) ; +static void dirty_journal_list(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) ; +static int flush_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, int old_only,int flushall) ; +static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) ; + +static void init_journal_hash(struct super_block *p_s_sb) { + memset(SB_JOURNAL(p_s_sb)->j_hash_table, 0, JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ; +} + +/* +** clears BH_Dirty and sticks the buffer on the clean list. Called because I can't allow refile_buffer to +** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for +** more details. +*/ +static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) { + if (bh) { + clear_bit(BH_Dirty, &bh->b_state) ; +#if 0 + if (bh->b_list != BUF_CLEAN) { + reiserfs_file_buffer(bh, BUF_CLEAN) ; + } +#endif + } + return 0 ; +} + +static struct reiserfs_bitmap_node * +allocate_bitmap_node(struct super_block *p_s_sb) { + struct reiserfs_bitmap_node *bn ; + static int id = 0 ; + + bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_BUFFER) ; + if (!bn) { + return NULL ; + } + bn->data = kmalloc(p_s_sb->s_blocksize, GFP_BUFFER) ; + if (!bn->data) { + kfree(bn) ; + return NULL ; + } + bn->id = id++ ; + memset(bn->data, 0, p_s_sb->s_blocksize) ; + INIT_LIST_HEAD(&bn->list) ; + return bn ; +} + +static struct reiserfs_bitmap_node * +get_bitmap_node(struct super_block *p_s_sb) { + struct reiserfs_bitmap_node *bn = NULL; + struct list_head *entry = SB_JOURNAL(p_s_sb)->j_bitmap_nodes.next ; + + SB_JOURNAL(p_s_sb)->j_used_bitmap_nodes++ ; +repeat: + + if(entry != &SB_JOURNAL(p_s_sb)->j_bitmap_nodes) { + bn = list_entry(entry, struct reiserfs_bitmap_node, list) ; + list_del(entry) ; + memset(bn->data, 0, p_s_sb->s_blocksize) ; + SB_JOURNAL(p_s_sb)->j_free_bitmap_nodes-- ; + return bn ; + } + bn = allocate_bitmap_node(p_s_sb) ; + if (!bn) { + current->policy = SCHED_YIELD ; + schedule() ; + goto repeat ; + } + return bn ; +} +static inline void free_bitmap_node(struct super_block *p_s_sb, + struct reiserfs_bitmap_node *bn) { + SB_JOURNAL(p_s_sb)->j_used_bitmap_nodes-- ; + if (SB_JOURNAL(p_s_sb)->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { + kfree(bn->data) ; + kfree(bn) ; + } else { + list_add(&bn->list, &SB_JOURNAL(p_s_sb)->j_bitmap_nodes) ; + SB_JOURNAL(p_s_sb)->j_free_bitmap_nodes++ ; + } +} + +static void allocate_bitmap_nodes(struct super_block *p_s_sb) { + int i ; + struct reiserfs_bitmap_node *bn = NULL ; + for (i = 0 ; i < REISERFS_MIN_BITMAP_NODES ; i++) { + bn = allocate_bitmap_node(p_s_sb) ; + if (bn) { + list_add(&bn->list, &SB_JOURNAL(p_s_sb)->j_bitmap_nodes) ; + SB_JOURNAL(p_s_sb)->j_free_bitmap_nodes++ ; + } else { + break ; // this is ok, we'll try again when more are needed + } + } +} + +static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block, + struct reiserfs_list_bitmap *jb) { + int bmap_nr = block / (p_s_sb->s_blocksize << 3) ; + int bit_nr = block % (p_s_sb->s_blocksize << 3) ; + + if (!jb->bitmaps[bmap_nr]) { + jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb) ; + } + set_bit(bit_nr, jb->bitmaps[bmap_nr]->data) ; + return 0 ; +} + +static void cleanup_bitmap_list(struct super_block *p_s_sb, + struct reiserfs_list_bitmap *jb) { + int i; + for (i = 0 ; i < SB_BMAP_NR(p_s_sb) ; i++) { + if (jb->bitmaps[i]) { + free_bitmap_node(p_s_sb, jb->bitmaps[i]) ; + jb->bitmaps[i] = NULL ; + } + } +} + +/* +** only call this on FS unmount. +*/ +static int free_list_bitmaps(struct super_block *p_s_sb, + struct reiserfs_list_bitmap *jb_array) { + int i ; + struct reiserfs_list_bitmap *jb ; + for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { + jb = jb_array + i ; + jb->journal_list = NULL ; + cleanup_bitmap_list(p_s_sb, jb) ; + vfree(jb->bitmaps) ; + jb->bitmaps = NULL ; + } + return 0; +} + +static int free_bitmap_nodes(struct super_block *p_s_sb) { + struct list_head *next = SB_JOURNAL(p_s_sb)->j_bitmap_nodes.next ; + struct reiserfs_bitmap_node *bn ; + + while(next != &SB_JOURNAL(p_s_sb)->j_bitmap_nodes) { + bn = list_entry(next, struct reiserfs_bitmap_node, list) ; + list_del(next) ; + kfree(bn->data) ; + kfree(bn) ; + next = SB_JOURNAL(p_s_sb)->j_bitmap_nodes.next ; + SB_JOURNAL(p_s_sb)->j_free_bitmap_nodes-- ; + } + + return 0 ; +} + +/* +** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. +** jb_array is the array to be filled in. +*/ +int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb, + struct reiserfs_list_bitmap *jb_array, + int bmap_nr) { + int i ; + int failed = 0 ; + struct reiserfs_list_bitmap *jb ; + int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *) ; + + for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { + jb = jb_array + i ; + jb->journal_list = NULL ; + jb->bitmaps = vmalloc( mem ) ; + if (!jb->bitmaps) { + reiserfs_warning("clm-2000, unable to allocate bitmaps for journal lists\n") ; + failed = 1; + break ; + } + memset(jb->bitmaps, 0, mem) ; + } + if (failed) { + free_list_bitmaps(p_s_sb, jb_array) ; + return -1 ; + } + return 0 ; +} + +/* +** find an available list bitmap. If you can't find one, flush a commit list +** and try again +*/ +static struct reiserfs_list_bitmap * +get_list_bitmap(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) { + int i,j ; + struct reiserfs_list_bitmap *jb = NULL ; + + for (j = 0 ; j < (JOURNAL_NUM_BITMAPS * 3) ; j++) { + i = SB_JOURNAL(p_s_sb)->j_list_bitmap_index ; + SB_JOURNAL(p_s_sb)->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS ; + jb = SB_JOURNAL(p_s_sb)->j_list_bitmap + i ; + if (SB_JOURNAL(p_s_sb)->j_list_bitmap[i].journal_list) { + flush_commit_list(p_s_sb, SB_JOURNAL(p_s_sb)->j_list_bitmap[i].journal_list, 1) ; + if (!SB_JOURNAL(p_s_sb)->j_list_bitmap[i].journal_list) { + break ; + } + } else { + break ; + } + } + if (jb->journal_list) { /* double check to make sure if flushed correctly */ + return NULL ; + } + jb->journal_list = jl ; + return jb ; +} + +/* +** allocates a new chunk of X nodes, and links them all together as a list. +** Uses the cnode->next and cnode->prev pointers +** returns NULL on failure +*/ +static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) { + struct reiserfs_journal_cnode *head ; + int i ; + if (num_cnodes <= 0) { + return NULL ; + } + head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)) ; + if (!head) { + return NULL ; + } + memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)) ; + head[0].prev = NULL ; + head[0].next = head + 1 ; + for (i = 1 ; i < num_cnodes; i++) { + head[i].prev = head + (i - 1) ; + head[i].next = head + (i + 1) ; /* if last one, overwrite it after the if */ + } + head[num_cnodes -1].next = NULL ; + return head ; +} + +/* +** pulls a cnode off the free list, or returns NULL on failure +*/ +static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb) { + struct reiserfs_journal_cnode *cn ; + int windex = push_journal_writer("get_cnode") ; + + reiserfs_check_lock_depth("get_cnode") ; + + if (SB_JOURNAL(p_s_sb)->j_cnode_free <= 0) { + pop_journal_writer(windex) ; + return NULL ; + } + SB_JOURNAL(p_s_sb)->j_cnode_used++ ; + SB_JOURNAL(p_s_sb)->j_cnode_free-- ; + cn = SB_JOURNAL(p_s_sb)->j_cnode_free_list ; + if (!cn) { + pop_journal_writer(windex) ; + return cn ; + } + if (cn->next) { + cn->next->prev = NULL ; + } + SB_JOURNAL(p_s_sb)->j_cnode_free_list = cn->next ; + memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; + pop_journal_writer(windex) ; + return cn ; +} + +/* +** returns a cnode to the free list +*/ +static void free_cnode(struct super_block *p_s_sb, struct reiserfs_journal_cnode *cn) { + int windex = push_journal_writer("free_cnode") ; + + reiserfs_check_lock_depth("free_cnode") ; + + SB_JOURNAL(p_s_sb)->j_cnode_used-- ; + SB_JOURNAL(p_s_sb)->j_cnode_free++ ; + /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */ + cn->next = SB_JOURNAL(p_s_sb)->j_cnode_free_list ; + if (SB_JOURNAL(p_s_sb)->j_cnode_free_list) { + SB_JOURNAL(p_s_sb)->j_cnode_free_list->prev = cn ; + } + cn->prev = NULL ; /* not needed with the memset, but I might kill the memset, and forget to do this */ + SB_JOURNAL(p_s_sb)->j_cnode_free_list = cn ; + pop_journal_writer(windex) ; +} + +static int clear_prepared_bits(struct buffer_head *bh) { + clear_bit(BH_JPrepared, &bh->b_state) ; + clear_bit(BH_JRestore_dirty, &bh->b_state) ; + return 0 ; +} + +/* buffer is in current transaction */ +inline int buffer_journaled(struct buffer_head *bh) { + if (bh) + return test_bit(BH_JDirty, &bh->b_state) ; + else + return 0 ; +} + +/* disk block was taken off free list before being in a finished transation, or written to disk +** journal_new blocks can be reused immediately, for any purpose +*/ +inline int buffer_journal_new(struct buffer_head *bh) { + if (bh) + return test_bit(BH_JNew, &bh->b_state) ; + else + return 0 ; +} + +inline int mark_buffer_journal_new(struct buffer_head *bh) { + if (bh) { + set_bit(BH_JNew, &bh->b_state) ; + } + return 0 ; +} + +inline int mark_buffer_not_journaled(struct buffer_head *bh) { + if (bh) + clear_bit(BH_JDirty, &bh->b_state) ; + return 0 ; +} + +void reiserfs_check_lock_depth(char *caller) { +#ifdef __SMP__ + if (current->lock_depth < 0) { + char *crashit = NULL ; + printk("%s called without kernel lock held\n", caller) ; + show_reiserfs_locks() ; + *crashit = 1 ; + } +#else + ; +#endif +} + +/* return a cnode with same dev, block number and size in table, or null if not found */ +static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct reiserfs_journal_cnode **table, + kdev_t dev,long bl,int size) { + struct reiserfs_journal_cnode *cn ; + cn = journal_hash(table, dev, bl) ; + while(cn) { + if ((cn->blocknr == bl) && (cn->dev == dev)) + return cn ; + cn = cn->hnext ; + } + return (struct reiserfs_journal_cnode *)0 ; +} + +/* returns a cnode with same size, block number and dev as bh in the current transaction hash. NULL if not found */ +static inline struct reiserfs_journal_cnode *get_journal_hash(struct super_block *p_s_sb, struct buffer_head *bh) { + struct reiserfs_journal_cnode *cn ; + if (bh) { + cn = get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_hash_table, bh->b_dev, bh->b_blocknr, bh->b_size) ; + } + else { + return (struct reiserfs_journal_cnode *)0 ; + } + return cn ; +} + +/* +** once upon a time, the journal would deadlock. a lot. Now, which CONFIG_REISERFS_CHECK, anytime someone enters a +** transaction, it pushes itself into this ugly static list, and pops itself off before calling journal_end. +** I made a SysRq key to dump the list, and tell me what the writers are when I'm deadlocked. +*/ +static char *journal_writers[512] ; +int push_journal_writer(char *s) { +#ifdef CONFIG_REISERFS_CHECK + int i ; + for (i = 0 ; i < 512 ; i++) { + if (!journal_writers[i]) { + journal_writers[i] = s ; + return i ; + } + } + return -1 ; +#else + return 0 ; +#endif +} +int pop_journal_writer(int index) { +#ifdef CONFIG_REISERFS_CHECK + if (index >= 0) { + journal_writers[index] = NULL ; + } +#endif + return 0 ; +} +int dump_journal_writers(void) { + int i ; + for (i = 0 ; i < 512 ; i++) { + if (journal_writers[i]) { + printk("%d: %s\n", i, journal_writers[i]) ; + } + } + return 0 ; +} + +/* +** this actually means 'can this block be reallocated yet?'. If you set search_all, a block can only be allocated +** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever +** being overwritten by a replay after crashing. +** +** If you don't set search_all, a block can only be allocated if it is not in the current transaction. Since deleting +** a block removes it from the current transaction, this case should never happen. If you don't set search_all, make +** sure you never write the block without logging it. +** +** next_zero_bit is a suggestion about the next block to try for find_forward. +** when bl is rejected because it is set in a journal list bitmap, we search +** for the next zero bit in the bitmap that rejected bl. Then, we return that +** through next_zero_bit for find_forward to try. +** +** Just because we return something in next_zero_bit does not mean we won't +** reject it on the next call to reiserfs_in_journal +** +*/ +int reiserfs_in_journal(struct super_block *p_s_sb, kdev_t dev, + unsigned long bl, int size, int search_all, + unsigned long *next_zero_bit) { + struct reiserfs_journal_cnode *cn ; + struct reiserfs_list_bitmap *jb ; + int i ; + int bmap_nr = bl / (p_s_sb->s_blocksize << 3) ; + int bit_nr = bl % (p_s_sb->s_blocksize << 3) ; + int tmp_bit ; + + *next_zero_bit = 0 ; /* always start this at zero. */ + + /* we aren't logging all blocks are safe for reuse */ + if (reiserfs_dont_log(p_s_sb)) { + return 0 ; + } + + /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. + ** if we crash before the transaction that freed it commits, this transaction won't + ** have committed either, and the block will never be written + */ + if (search_all) { + for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { + jb = SB_JOURNAL(p_s_sb)->j_list_bitmap + i ; + if (jb->journal_list && jb->bitmaps[bmap_nr] && + test_bit(bit_nr, jb->bitmaps[bmap_nr]->data)) { + tmp_bit = find_next_zero_bit((unsigned long *) + (jb->bitmaps[bmap_nr]->data), + p_s_sb->s_blocksize << 3, bit_nr+1) ; + *next_zero_bit = bmap_nr * (p_s_sb->s_blocksize << 3) + tmp_bit ; + return 1 ; + } + } + } + + /* is it in any old transactions? */ + if (search_all && (cn = get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_list_hash_table, dev,bl,size))) { + return 1; + } + + /* is it in the current transaction. This should never happen */ + if ((cn = get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_hash_table, dev,bl,size))) { + return 1; + } + + /* safe for reuse */ + return 0 ; +} + +/* insert cn into table +*/ +inline void insert_journal_hash(struct reiserfs_journal_cnode **table, struct reiserfs_journal_cnode *cn) { + struct reiserfs_journal_cnode *cn_orig ; + + cn_orig = journal_hash(table, cn->dev, cn->blocknr) ; + cn->hnext = cn_orig ; + cn->hprev = NULL ; + if (cn_orig) { + cn_orig->hprev = cn ; + } + journal_hash(table, cn->dev, cn->blocknr) = cn ; +} + +/* lock the current transaction */ +inline static void lock_journal(struct super_block *p_s_sb) { + int windex = push_journal_writer("lock_journal") ; + while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_wlock)) > 0) { + sleep_on(&(SB_JOURNAL(p_s_sb)->j_wait)) ; + } + atomic_set(&(SB_JOURNAL(p_s_sb)->j_wlock), 1) ; + pop_journal_writer(windex) ; +} + +/* unlock the current transaction */ +inline static void unlock_journal(struct super_block *p_s_sb) { + atomic_dec(&(SB_JOURNAL(p_s_sb)->j_wlock)) ; + wake_up(&(SB_JOURNAL(p_s_sb)->j_wait)) ; +} + +/* +** this used to be much more involved, and I'm keeping it just in case things get ugly again. +** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a +** transaction. +*/ +static void cleanup_freed_for_journal_list(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) { + + struct reiserfs_list_bitmap *jb = jl->j_list_bitmap ; + if (jb) { + cleanup_bitmap_list(p_s_sb, jb) ; + } + jl->j_list_bitmap->journal_list = NULL ; + jl->j_list_bitmap = NULL ; +} + +/* +** if this journal list still has commit blocks unflushed, send them to disk. +** +** log areas must be flushed in order (transaction 2 can't commit before transaction 1) +** Before the commit block can by written, every other log block must be safely on disk +** +*/ +static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) { + int i, count ; + int index = 0 ; + int bn ; + int retry_count = 0 ; + int orig_commit_left = 0 ; + struct buffer_head *tbh = NULL ; + struct reiserfs_journal_list *other_jl ; + int windex = push_journal_writer("flush_commit_list") ; + + reiserfs_check_lock_depth("flush_commit_list") ; + + if (atomic_read(&jl->j_older_commits_done)) { + pop_journal_writer(windex) ; + return 0 ; + } + + /* before we can put our commit blocks on disk, we have to make sure everyone older than + ** us is on disk too + */ + if (jl->j_len <= 0) { + pop_journal_writer(windex) ; + return 0 ; + } + if (flushall) { + /* we _must_ make sure the transactions are committed in order. Start with the + ** index after this one, wrap all the way around + */ + index = (jl - SB_JOURNAL_LIST(s)) + 1 ; + for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) { + other_jl = SB_JOURNAL_LIST(s) + ( (index + i) % JOURNAL_LIST_COUNT) ; + if (other_jl && other_jl != jl && other_jl->j_len > 0 && other_jl->j_trans_id > 0 && + other_jl->j_trans_id <= jl->j_trans_id && (atomic_read(&(jl->j_older_commits_done)) == 0)) { + flush_commit_list(s, other_jl, 0) ; + } + } + } + + count = 0 ; + /* don't flush the commit list for the current transactoin */ + if (jl == ((SB_JOURNAL_LIST(s) + SB_JOURNAL_LIST_INDEX(s)))) { + pop_journal_writer(windex) ; + return 0 ; + } + + /* make sure nobody is trying to flush this one at the same time */ + if (atomic_read(&(jl->j_commit_flushing))) { + sleep_on(&(jl->j_commit_wait)) ; + if (flushall) { + atomic_set(&(jl->j_older_commits_done), 1) ; + } + pop_journal_writer(windex) ; + return 0 ; + } + + /* this commit is done, exit */ + if (atomic_read(&(jl->j_commit_left)) <= 0) { + if (flushall) { + atomic_set(&(jl->j_older_commits_done), 1) ; + } + pop_journal_writer(windex) ; + return 0 ; + } + atomic_set(&(jl->j_commit_flushing), 1) ; /* keeps end_io from doing anything with commit_bh, and + others from flushing while we are flushing */ + + + if (jl->j_len > JOURNAL_TRANS_MAX) { + reiserfs_panic(s, "journal-512: flush_commit_list: length is %lu, list number %d\n", jl->j_len, jl - SB_JOURNAL_LIST(s)) ; + pop_journal_writer(windex) ; + return 0 ; + } + + orig_commit_left = atomic_read(&(jl->j_commit_left)) ; + + /* start by checking all the commit blocks in this transaction. + ** Add anyone not on disk into tbh. Stop checking once commit_left <= 1, because that means we + ** only have the commit block left + */ +retry: + count = 0 ; + for (i = 0 ; atomic_read(&(jl->j_commit_left)) > 1 && i < (jl->j_len + 1) ; i++) { /* everything but commit_bh */ + bn = reiserfs_get_journal_block(s) + (jl->j_start+i) % JOURNAL_BLOCK_COUNT; + tbh = get_hash_table(s->s_dev, bn, s->s_blocksize) ; + +/* kill this sanity check */ +if (count > (orig_commit_left + 2)) { +reiserfs_panic(s, "journal-539: flush_commit_list: BAD count(%d) > orig_commit_left(%d)!\n", count, orig_commit_left) ; +} + if (tbh) { + if (buffer_locked(tbh)) { /* wait on it, redo it just to make sure */ + wait_on_buffer(tbh) ; + if (!buffer_uptodate(tbh)) { + reiserfs_panic(s, "journal-584, buffer write failed\n") ; + } + } + if (buffer_dirty(tbh)) { + printk("journal-569: flush_commit_list, block already dirty!\n") ; + } else { + mark_buffer_dirty(tbh) ; + } + tbh->b_end_io = reiserfs_journal_end_io ; /* not needed */ + ll_rw_block(WRITE, 1, &tbh) ; + count++ ; + atomic_dec(&(tbh->b_count)) ; /* once for our get_hash */ + } + } + + /* wait on everyone in tbh before writing commit block*/ + if (count > 0) { + for (i = 0 ; atomic_read(&(jl->j_commit_left)) > 1 && + i < (jl->j_len + 1) ; i++) { /* everything but commit_bh */ + bn = reiserfs_get_journal_block(s) + (jl->j_start + i) % JOURNAL_BLOCK_COUNT ; + tbh = get_hash_table(s->s_dev, bn, s->s_blocksize) ; + + wait_on_buffer(tbh) ; + if (!buffer_uptodate(tbh)) { + reiserfs_panic(s, "journal-601, buffer write failed\n") ; + } + atomic_dec(&(tbh->b_count)) ; /* once for our get_hash */ + brelse(tbh) ; /* once due to original getblk in do_journal_end */ + atomic_dec(&(jl->j_commit_left)) ; + } + } + + if (atomic_read(&(jl->j_commit_left)) != 1) { /* just the commit_bh left, flush it without calling getblk for everyone */ + if (retry_count < 2) { + printk("journal-582: flush_commit_list, not all log blocks on disk yet, trying again\n") ; + retry_count++ ; + goto retry; + } + reiserfs_panic(s, "journal-563: flush_commit_list: BAD, j_commit_left is %lu, should be 1\n", + atomic_read(&(jl->j_commit_left))); + } + + mark_buffer_dirty(jl->j_commit_bh) ; + ll_rw_block(WRITE, 1, &(jl->j_commit_bh)) ; + wait_on_buffer(jl->j_commit_bh) ; + if (!buffer_uptodate(jl->j_commit_bh)) { + reiserfs_panic(s, "journal-615: buffer write failed\n") ; + } + atomic_dec(&(jl->j_commit_left)) ; + brelse(jl->j_commit_bh) ; + + /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ + cleanup_freed_for_journal_list(s, jl) ; + + /* and it is safe to dirty/release all the real buffer heads */ + dirty_journal_list(s, jl) ; + + if (flushall) { + atomic_set(&(jl->j_older_commits_done), 1) ; + } + atomic_set(&(jl->j_commit_flushing), 0) ; + wake_up(&(jl->j_commit_wait)) ; + pop_journal_writer(windex) ; + return 0 ; +} + +/* +** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or +** returns NULL if it can't find anything +*/ +static struct reiserfs_journal_list *find_newer_jl_for_cn(struct reiserfs_journal_cnode *cn) { + kdev_t dev = cn->dev; + unsigned long blocknr = cn->blocknr ; + + cn = cn->hprev ; + while(cn) { + if (cn->dev == dev && cn->blocknr == blocknr && cn->jlist) { + return cn->jlist ; + } + cn = cn->hprev ; + } + return NULL ; +} + + +/* +** once all the real blocks have been flushed, it is safe to remove them from the +** journal list for this transaction. Aside from freeing the cnode, this also allows the +** block to be reallocated for data blocks if it had been deleted. +*/ +static void remove_all_from_journal_list(struct super_block *p_s_sb, struct reiserfs_journal_list *jl, int debug) { + struct buffer_head fake_bh ; + struct reiserfs_journal_cnode *cn, *last ; + int windex = push_journal_writer("remove_all_from_journal_list") ; + cn = jl->j_realblock ; + + /* which is better, to lock once around the whole loop, or + ** to lock for each call to remove_from_journal_list? + */ + while(cn) { + if (cn->blocknr != 0) { + if (debug) { + printk("block %lu, bh is %d, state %d\n", cn->blocknr, cn->bh ? 1: 0, + cn->state) ; + } + fake_bh.b_blocknr = cn->blocknr ; + fake_bh.b_dev = cn->dev ; + cn->state = 0 ; + remove_from_journal_list(p_s_sb, jl, &fake_bh, 1) ; + } + last = cn ; + cn = cn->next ; + free_cnode(p_s_sb, last) ; + } + jl->j_realblock = NULL ; + pop_journal_writer(windex) ; +} + +/* +** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block. +** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start +** releasing blocks in this transaction for reuse as data blocks. +** called by flush_journal_list, before it calls remove_all_from_journal_list +** +*/ +static int update_journal_header_block(struct super_block *p_s_sb, unsigned long offset, unsigned long trans_id) { + struct reiserfs_journal_header *jh ; + if (trans_id >= SB_JOURNAL(p_s_sb)->j_last_flush_trans_id) { + if (buffer_locked((SB_JOURNAL(p_s_sb)->j_header_bh))) { + wait_on_buffer((SB_JOURNAL(p_s_sb)->j_header_bh)) ; + if (!buffer_uptodate(SB_JOURNAL(p_s_sb)->j_header_bh)) { + reiserfs_panic(p_s_sb, "journal-699: buffer write failed\n") ; + } + } + SB_JOURNAL(p_s_sb)->j_last_flush_trans_id = trans_id ; + SB_JOURNAL(p_s_sb)->j_first_unflushed_offset = offset ; + jh = (struct reiserfs_journal_header *)(SB_JOURNAL(p_s_sb)->j_header_bh->b_data) ; + jh->j_last_flush_trans_id = cpu_to_le32(trans_id) ; + jh->j_first_unflushed_offset = cpu_to_le32(offset) ; + jh->j_mount_id = cpu_to_le32(SB_JOURNAL(p_s_sb)->j_mount_id) ; + set_bit(BH_Dirty, &(SB_JOURNAL(p_s_sb)->j_header_bh->b_state)) ; + ll_rw_block(WRITE, 1, &(SB_JOURNAL(p_s_sb)->j_header_bh)) ; + wait_on_buffer((SB_JOURNAL(p_s_sb)->j_header_bh)) ; + if (!buffer_uptodate(SB_JOURNAL(p_s_sb)->j_header_bh)) { + reiserfs_panic(p_s_sb, "journal-712: buffer write failed\n") ; + } + } + return 0 ; +} + +/* +** returns 1 if all older journal lists have been flushed +*/ +static int older_journal_lists_are_flushed(struct super_block *p_s_sb, unsigned long trans_id) { + int i ; + struct reiserfs_journal_list *jl ; + for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) { + jl = SB_JOURNAL_LIST(p_s_sb) + i ; + if (jl && jl->j_len > 0 && jl->j_trans_id < trans_id && atomic_read(&(jl->j_nonzerolen)) > 0) { + return 0 ; + } + } + return 1 ; +} + +/* +** flush any and all journal lists older than you are +** can only be called from flush_journal_list +*/ +static int flush_older_journal_lists(struct super_block *p_s_sb, struct reiserfs_journal_list *jl, unsigned long trans_id) { + int i, index ; + struct reiserfs_journal_list *other_jl ; + + index = jl - SB_JOURNAL_LIST(p_s_sb) ; + for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) { + other_jl = SB_JOURNAL_LIST(p_s_sb) + ((index + i) % JOURNAL_LIST_COUNT) ; + if (other_jl && other_jl->j_len > 0 && + other_jl->j_trans_id > 0 && + other_jl->j_trans_id < trans_id && + other_jl != jl) { + /* not old only, not flush all */ + flush_journal_list(p_s_sb, other_jl, 0, 0) ; + } + } + return 0 ; +} + +/* flush a journal list, both commit and real blocks +** set old_only to one if you only want to touch journal_lists that are fully flushed and done with. This allows +** you to free the memory they are using +** +** always set flushall to 1, unless you are flushing all of them, or you are calling from inside +** flush_journal_list +** +** IMPORTANT. This can only be called while there are no journal writers, and the journal is locked. That means +** it can only be called from do_journal_end. If you set old_only, you can call from other places. journal_release +** can call this because there aren't any writers then. +*/ +static int flush_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, int old_only, int flushall) { + struct reiserfs_journal_list *pjl ; + struct reiserfs_journal_cnode *cn, *last ; + int count ; + int was_jwait = 0 ; + int was_dirty = 0 ; + struct buffer_head *saved_bh ; + unsigned long j_len_saved = jl->j_len ; + int windex = push_journal_writer("flush_journal_list") ; + + if (j_len_saved <= 0) { + pop_journal_writer(windex) ; + return 0 ; + } + /* pretest to avoid the locking */ + if (old_only && (atomic_read(&(jl->j_nonzerolen)) > 0 || + atomic_read(&(jl->j_flushing)))) { + pop_journal_writer(windex) ; + return 0 ; + } + while (atomic_read(&(jl->j_commit_flushing)) && !old_only) { /* if someone is getting the commit list, we must wait for them */ + sleep_on(&(jl->j_commit_wait)) ; + } + /* if someone is flushing this list, we must wait for them */ + while (atomic_read(&(jl->j_flushing))) { + sleep_on(&(jl->j_flush_wait)) ; + } + + /* this list is now ours, we can change anything we want */ + atomic_set(&(jl->j_flushing), 1) ; + + count = 0 ; + if (j_len_saved > JOURNAL_TRANS_MAX) { + reiserfs_panic(s, "journal-715: flush_journal_list, length is %lu, list number %d\n", j_len_saved, jl - SB_JOURNAL_LIST(s)) ; + atomic_dec(&(jl->j_flushing)) ; + pop_journal_writer(windex) ; + return 0 ; + } + + /* if all the work is already done, get out of here */ + if (atomic_read(&(jl->j_nonzerolen)) <= 0 && atomic_read(&(jl->j_commit_left)) <= 0) { + if (flushall) { + flush_older_journal_lists(s, jl, jl->j_trans_id) ; + } else if (old_only && !older_journal_lists_are_flushed(s, jl->j_trans_id)) { /* only flush if we were called old_only */ + atomic_dec(&(jl->j_flushing)) ; + wake_up(&(jl->j_flush_wait)) ; + pop_journal_writer(windex) ; + return 0 ; + } + update_journal_header_block(s, (jl->j_start + jl->j_len + 2) % JOURNAL_BLOCK_COUNT, jl->j_trans_id) ; + remove_all_from_journal_list(s, jl, 0) ; + jl->j_len = 0 ; + jl->j_start = 0 ; + jl->j_commit_bh = NULL ; + jl->j_trans_id = 0 ; + atomic_dec(&(jl->j_flushing)) ; + wake_up(&(jl->j_flush_wait)) ; + pop_journal_writer(windex) ; + return 0 ; + } + + /* if we were called old_only, we're done. */ + if (old_only) { + atomic_dec(&(jl->j_flushing)) ; + wake_up(&(jl->j_flush_wait)) ; + pop_journal_writer(windex) ; + return 0 ; + } + + /* not old only, start by putting the commit list on disk. This will also flush the commit lists of any + ** olders transactions, which is important + */ + flush_commit_list(s, jl, 1) ; + + /* are we done now? */ + if (atomic_read(&(jl->j_nonzerolen)) <= 0 && atomic_read(&(jl->j_commit_left)) <= 0) { + if (flushall) { + flush_older_journal_lists(s, jl, jl->j_trans_id) ; + } + update_journal_header_block(s, (jl->j_start + jl->j_len + 2) % JOURNAL_BLOCK_COUNT, jl->j_trans_id) ; + remove_all_from_journal_list(s, jl, 0) ; + jl->j_len = 0 ; + jl->j_start = 0 ; + jl->j_commit_bh = NULL ; + jl->j_trans_id = 0 ; + atomic_dec(&(jl->j_flushing)) ; + wake_up(&(jl->j_flush_wait)) ; + pop_journal_writer(windex) ; + return 0 ; + } + + /* loop through each cnode, see if we need to write it, or wait on a more recent transaction, or just ignore it */ + if (atomic_read(&(SB_JOURNAL(s)->j_wcount)) != 0) { + reiserfs_panic(s, "journal-844: panic journal list is flushing, wcount is not 0\n") ; + } + cn = jl->j_realblock ; + while(cn) { + was_jwait = 0 ; + was_dirty = 0 ; + saved_bh = NULL ; + /* blocknr of 0 is no longer in the hash, ignore it */ + if (cn->blocknr == 0) { + goto free_cnode ; + } + pjl = find_newer_jl_for_cn(cn) ; + /* the order is important here. We check pjl to make sure we + ** don't clear BH_JDirty_wait if we aren't the one writing this + ** block to disk + */ + if (!pjl && cn->bh) { + saved_bh = cn->bh ; + + /* we do this to make sure nobody releases the buffer while we are working with it */ + atomic_inc(&(saved_bh->b_count)) ; + + if (buffer_journal_dirty(saved_bh)) { + was_jwait = 1 ; + mark_buffer_notjournal_dirty(saved_bh) ; + atomic_dec(&(saved_bh->b_count)) ; /* brelse the inc from journal_mark_dirty */ + } + if (buffer_dirty(saved_bh)) { + was_dirty = 1 ; + } + } + + /* if someone has this block in a newer transaction, just make + ** sure they are commited, and don't try writing it to disk + */ + if (pjl) { + flush_commit_list(s, pjl, 1) ; + goto free_cnode ; + } + + /* bh == NULL when the block got to disk on its own, OR, the block got freed in a future transaction */ + if (saved_bh == NULL) { + goto free_cnode ; + } + + /* the end_io task might not have run the buffer yet, so it is possible + ** to have jwait buffer that isn't dirty. It is not possible to have + ** a buffer here that isn't mark BH_JDirty_wait + */ + + if ((!was_jwait) && !buffer_locked(saved_bh)) { +printk("journal-813: BAD! buffer %lu %cdirty %cjwait, not in a newer tranasction\n", saved_bh->b_blocknr, + was_dirty ? ' ' : '!', was_jwait ? ' ' : '!') ; + } + /* if it is locked, we wait on it so the end_io handler does not clobber something we are doing + ** we'll clear it out of the hash at the end with remove_all + */ + if (buffer_locked(saved_bh)) { + wait_on_buffer(saved_bh) ; + if (!buffer_uptodate(saved_bh)) { + reiserfs_panic(s, "journal-923: buffer write failed\n") ; + } + goto free_cnode ; + } else if (buffer_dirty(saved_bh)) { /* it is still dirty, send to disk */ + /* we inc again because saved_bh gets decremented at free_cnode */ + atomic_inc(&(saved_bh->b_count)) ; + set_bit(BLOCK_NEEDS_FLUSH, &cn->state) ; + ll_rw_block(WRITE, 1, &saved_bh) ; + count++ ; + } +free_cnode: + last = cn ; + cn = cn->next ; + if (saved_bh) { + atomic_dec(&(saved_bh->b_count)); /* we incremented this to keep others from taking the buffer head away */ + if (atomic_read(&(saved_bh->b_count)) < 0) { + printk("journal-945: saved_bh->b_count < 0") ; + } + } + } + if (count > 0) { + cn = jl->j_realblock ; + while(cn) { + if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { + if (!cn->bh) { + reiserfs_panic(s, "journal-1011: cn->bh is NULL\n") ; + } + wait_on_buffer(cn->bh) ; + if (!cn->bh) { + reiserfs_panic(s, "journal-1012: cn->bh is NULL\n") ; + } + if (!buffer_uptodate(cn->bh)) { + reiserfs_panic(s, "journal-949: buffer write failed\n") ; + } + brelse(cn->bh) ; + } + cn = cn->next ; + } + } + + /* before we can update the journal header block, we _must_ flush all real blocks from all older transactions to disk */ + if (flushall) { + flush_older_journal_lists(s, jl, jl->j_trans_id) ; + } + + /* before we can remove everything from the hash tables for this transaction, we must make sure it can + ** never be replayed + */ + update_journal_header_block(s, (jl->j_start + jl->j_len + 2) % JOURNAL_BLOCK_COUNT, jl->j_trans_id) ; + remove_all_from_journal_list(s, jl, 0) ; + jl->j_len = 0 ; + atomic_set(&(jl->j_nonzerolen), 0) ; + jl->j_start = 0 ; + jl->j_realblock = NULL ; + jl->j_commit_bh = NULL ; + jl->j_trans_id = 0 ; + atomic_dec(&(jl->j_flushing)) ; + wake_up(&(jl->j_flush_wait)) ; + pop_journal_writer(windex) ; + return 0 ; +} + + +/* +** removes any nodes in table with name block and dev as bh. +** only touchs the hnext and hprev pointers. +*/ +void remove_journal_hash(struct reiserfs_journal_cnode **table, struct reiserfs_journal_list *jl,struct buffer_head *bh, + int remove_freed){ + struct reiserfs_journal_cnode *cur ; + struct reiserfs_journal_cnode **head ; + + if (!bh) + return ; + + head= &(journal_hash(table, bh->b_dev, bh->b_blocknr)) ; + if (!head) { + return ; + } + cur = *head ; + while(cur) { + if (cur->blocknr == bh->b_blocknr && cur->dev == bh->b_dev && (jl == NULL || jl == cur->jlist) && + (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) { + if (cur->hnext) { + cur->hnext->hprev = cur->hprev ; + } + if (cur->hprev) { + cur->hprev->hnext = cur->hnext ; + } else { + *head = cur->hnext ; + } + cur->blocknr = 0 ; + cur->dev = 0 ; + cur->state = 0 ; + if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */ + atomic_dec(&(cur->jlist->j_nonzerolen)) ; + cur->bh = NULL ; + cur->jlist = NULL ; + } + cur = cur->hnext ; + } +} + +static void free_journal_ram(struct super_block *p_s_sb) { + vfree(SB_JOURNAL(p_s_sb)->j_cnode_free_orig) ; + free_list_bitmaps(p_s_sb, SB_JOURNAL(p_s_sb)->j_list_bitmap) ; + free_bitmap_nodes(p_s_sb) ; /* must be after free_list_bitmaps */ + if (SB_JOURNAL(p_s_sb)->j_header_bh) { + brelse(SB_JOURNAL(p_s_sb)->j_header_bh) ; + } + vfree(SB_JOURNAL(p_s_sb)) ; +} + +/* +** call on unmount. Only set error to 1 if you haven't made your way out +** of read_super() yet. Any other caller must keep error at 0. +*/ +static int do_journal_release(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, int error) { + struct reiserfs_transaction_handle myth ; + + /* we only want to flush out transactions if we were called with error == 0 + */ + if (!error) { + /* end the current trans */ + do_journal_end(th, p_s_sb,10, FLUSH_ALL) ; + + /* make sure something gets logged to force our way into the flush code */ + journal_join(&myth, p_s_sb, 1) ; + reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; + journal_mark_dirty(&myth, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; + do_journal_end(&myth, p_s_sb,1, FLUSH_ALL) ; + + /* get any stragglers from the task queue */ + down(&reiserfs_end_io_sem) ; + run_task_queue(&reiserfs_end_io_tq) ; + up(&reiserfs_end_io_sem) ; + } + + /* we decrement before we wake up, because the commit thread dies off + ** when it has been woken up and the count is <= 0 + */ + reiserfs_mounted_fs_count-- ; + wake_up(&reiserfs_commit_thread_wait) ; + sleep_on(&reiserfs_commit_thread_done) ; + + free_journal_ram(p_s_sb) ; + + return 0 ; +} + +/* +** call on unmount. flush all journal trans, release all alloc'd ram +*/ +int journal_release(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb) { + return do_journal_release(th, p_s_sb, 0) ; +} +/* +** only call from an error condition inside reiserfs_read_super! +*/ +int journal_release_error(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb) { + return do_journal_release(th, p_s_sb, 1) ; +} + +/* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ +static int journal_compare_desc_commit(struct super_block *p_s_sb, struct reiserfs_journal_desc *desc, + struct reiserfs_journal_commit *commit) { + if (le32_to_cpu(commit->j_trans_id) != le32_to_cpu(desc->j_trans_id) || + le32_to_cpu(commit->j_len) != le32_to_cpu(desc->j_len) || + le32_to_cpu(commit->j_len) > JOURNAL_TRANS_MAX || + le32_to_cpu(commit->j_len) <= 0 + ) { + return 1 ; + } + return 0 ; +} +/* returns 0 if it did not find a description block +** returns -1 if it found a corrupt commit block +** returns 1 if both desc and commit were valid +*/ +static int journal_transaction_is_valid(struct super_block *p_s_sb, struct buffer_head *d_bh, unsigned long *oldest_invalid_trans_id, unsigned long *newest_mount_id) { + struct reiserfs_journal_desc *desc ; + struct reiserfs_journal_commit *commit ; + struct buffer_head *c_bh ; + unsigned long offset ; + + desc = (struct reiserfs_journal_desc *)d_bh->b_data ; + if (le32_to_cpu(desc->j_len) > 0 && !memcmp(desc->j_magic, JOURNAL_DESC_MAGIC, 8)) { + if (oldest_invalid_trans_id && *oldest_invalid_trans_id && le32_to_cpu(desc->j_trans_id) > *oldest_invalid_trans_id) { + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-986: transaction " + "is valid returning because trans_id %d is greater than " + "oldest_invalid %lu\n", le32_to_cpu(desc->j_trans_id), + *oldest_invalid_trans_id); + return 0 ; + } + if (newest_mount_id && *newest_mount_id > le32_to_cpu(desc->j_mount_id)) { + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1087: transaction " + "is valid returning because mount_id %d is less than " + "newest_mount_id %lu\n", desc->j_mount_id, + *newest_mount_id) ; + return -1 ; + } + offset = d_bh->b_blocknr - reiserfs_get_journal_block(p_s_sb) ; + + /* ok, we have a journal description block, lets see if the transaction was valid */ + c_bh = bread(p_s_sb->s_dev, reiserfs_get_journal_block(p_s_sb) + ((offset + le32_to_cpu(desc->j_len) + 1) % JOURNAL_BLOCK_COUNT), + p_s_sb->s_blocksize) ; + if (!c_bh) + return 0 ; + commit = (struct reiserfs_journal_commit *)c_bh->b_data ; + if (journal_compare_desc_commit(p_s_sb, desc, commit)) { + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, + "journal_transaction_is_valid, commit offset %ld had bad " + "time %d or length %d\n", + c_bh->b_blocknr - reiserfs_get_journal_block(p_s_sb), + le32_to_cpu(commit->j_trans_id), + le32_to_cpu(commit->j_len)); + brelse(c_bh) ; + if (oldest_invalid_trans_id) + *oldest_invalid_trans_id = le32_to_cpu(desc->j_trans_id) ; + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1004: " + "transaction_is_valid setting oldest invalid trans_id " + "to %d\n", le32_to_cpu(desc->j_trans_id)) ; + return -1; + } + brelse(c_bh) ; + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1006: found valid " + "transaction start offset %lu, len %d id %d\n", + d_bh->b_blocknr - reiserfs_get_journal_block(p_s_sb), + le32_to_cpu(desc->j_len), le32_to_cpu(desc->j_trans_id)) ; + return 1 ; + } else { + return 0 ; + } +} + +static void brelse_array(struct buffer_head **heads, int num) { + int i ; + for (i = 0 ; i < num ; i++) { + brelse(heads[i]) ; + } +} + +/* +** given the start, and values for the oldest acceptable transactions, +** this either reads in a replays a transaction, or returns because the transaction +** is invalid, or too old. +*/ +static int journal_read_transaction(struct super_block *p_s_sb, unsigned long cur_dblock, unsigned long oldest_start, + unsigned long oldest_trans_id, unsigned long newest_mount_id) { + struct reiserfs_journal_desc *desc ; + struct reiserfs_journal_commit *commit ; + unsigned long trans_id = 0 ; + struct buffer_head *c_bh ; + struct buffer_head *d_bh ; + struct buffer_head **log_blocks = NULL ; + struct buffer_head **real_blocks = NULL ; + unsigned long trans_offset ; + int i; + + d_bh = bread(p_s_sb->s_dev, cur_dblock, p_s_sb->s_blocksize) ; + if (!d_bh) + return 1 ; + desc = (struct reiserfs_journal_desc *)d_bh->b_data ; + trans_offset = d_bh->b_blocknr - reiserfs_get_journal_block(p_s_sb) ; + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: " + "journal_read_transaction, offset %lu, len %d mount_id %d\n", + d_bh->b_blocknr - reiserfs_get_journal_block(p_s_sb), + le32_to_cpu(desc->j_len), le32_to_cpu(desc->j_mount_id)) ; + if (le32_to_cpu(desc->j_trans_id) < oldest_trans_id) { + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: " + "journal_read_trans skipping because %lu is too old\n", + cur_dblock - reiserfs_get_journal_block(p_s_sb)) ; + brelse(d_bh) ; + return 1 ; + } + if (le32_to_cpu(desc->j_mount_id) != newest_mount_id) { + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: " + "journal_read_trans skipping because %d is != " + "newest_mount_id %lu\n", le32_to_cpu(desc->j_mount_id), + newest_mount_id) ; + brelse(d_bh) ; + return 1 ; + } + c_bh = bread(p_s_sb->s_dev, reiserfs_get_journal_block(p_s_sb) + ((trans_offset + le32_to_cpu(desc->j_len) + 1) % JOURNAL_BLOCK_COUNT), + p_s_sb->s_blocksize) ; + if (!c_bh) { + brelse(d_bh) ; + return 1 ; + } + commit = (struct reiserfs_journal_commit *)c_bh->b_data ; + if (journal_compare_desc_commit(p_s_sb, desc, commit)) { + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal_read_transaction, " + "commit offset %ld had bad time %d or length %d\n", + c_bh->b_blocknr - reiserfs_get_journal_block(p_s_sb), + le32_to_cpu(commit->j_trans_id), le32_to_cpu(commit->j_len)); + brelse(c_bh) ; + brelse(d_bh) ; + return 1; + } + trans_id = le32_to_cpu(desc->j_trans_id) ; + /* now we know we've got a good transaction, and it was inside the valid time ranges */ + log_blocks = kmalloc(le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *), GFP_KERNEL) ; + real_blocks = kmalloc(le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *), GFP_KERNEL) ; + if (!log_blocks || !real_blocks) { + brelse(c_bh) ; + brelse(d_bh) ; + kfree(log_blocks) ; + kfree(real_blocks) ; + reiserfs_warning("journal-1169: kmalloc failed, unable to mount FS\n") ; + return -1 ; + } + /* get all the buffer heads */ + for(i = 0 ; i < le32_to_cpu(desc->j_len) ; i++) { + log_blocks[i] = getblk(p_s_sb->s_dev, reiserfs_get_journal_block(p_s_sb) + (trans_offset + 1 + i) % JOURNAL_BLOCK_COUNT, p_s_sb->s_blocksize); + if (i < JOURNAL_TRANS_HALF) { + real_blocks[i] = getblk(p_s_sb->s_dev, le32_to_cpu(desc->j_realblock[i]), p_s_sb->s_blocksize) ; + } else { + real_blocks[i] = getblk(p_s_sb->s_dev, le32_to_cpu(commit->j_realblock[i - JOURNAL_TRANS_HALF]), p_s_sb->s_blocksize) ; + } + if (real_blocks[i]->b_blocknr >= reiserfs_get_journal_block(p_s_sb) && + real_blocks[i]->b_blocknr < (reiserfs_get_journal_block(p_s_sb)+JOURNAL_BLOCK_COUNT)) { + reiserfs_warning("journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block\n") ; + brelse_array(log_blocks, i) ; + brelse_array(real_blocks, i) ; + brelse(c_bh) ; + brelse(d_bh) ; + kfree(log_blocks) ; + kfree(real_blocks) ; + return -1 ; + } + } + /* read in the log blocks, memcpy to the corresponding real block */ + ll_rw_block(READ, le32_to_cpu(desc->j_len), log_blocks) ; + for (i = 0 ; i < le32_to_cpu(desc->j_len) ; i++) { + wait_on_buffer(log_blocks[i]) ; + if (!buffer_uptodate(log_blocks[i])) { + reiserfs_warning("journal-1212: REPLAY FAILURE fsck required! buffer write failed\n") ; + brelse_array(log_blocks + i, le32_to_cpu(desc->j_len) - i) ; + brelse_array(real_blocks, le32_to_cpu(desc->j_len)) ; + brelse(c_bh) ; + brelse(d_bh) ; + kfree(log_blocks) ; + kfree(real_blocks) ; + return -1 ; + } + memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data, real_blocks[i]->b_size) ; + mark_buffer_uptodate(real_blocks[i], 1) ; + brelse(log_blocks[i]) ; + } + /* flush out the real blocks */ + for (i = 0 ; i < le32_to_cpu(desc->j_len) ; i++) { + set_bit(BH_Dirty, &(real_blocks[i]->b_state)) ; + ll_rw_block(WRITE, 1, real_blocks + i) ; + } + for (i = 0 ; i < le32_to_cpu(desc->j_len) ; i++) { + wait_on_buffer(real_blocks[i]) ; + if (!buffer_uptodate(real_blocks[i])) { + reiserfs_warning("journal-1226: REPLAY FAILURE, fsck required! buffer write failed\n") ; + brelse_array(real_blocks + i, le32_to_cpu(desc->j_len) - i) ; + brelse(c_bh) ; + brelse(d_bh) ; + kfree(log_blocks) ; + kfree(real_blocks) ; + return -1 ; + } + brelse(real_blocks[i]) ; + } + cur_dblock = reiserfs_get_journal_block(p_s_sb) + ((trans_offset + le32_to_cpu(desc->j_len) + 2) % JOURNAL_BLOCK_COUNT) ; + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1095: setting journal " + "start to offset %ld\n", + cur_dblock - reiserfs_get_journal_block(p_s_sb)) ; + + /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ + SB_JOURNAL(p_s_sb)->j_start = cur_dblock - reiserfs_get_journal_block(p_s_sb) ; + SB_JOURNAL(p_s_sb)->j_last_flush_trans_id = trans_id ; + SB_JOURNAL(p_s_sb)->j_trans_id = trans_id + 1; + brelse(c_bh) ; + brelse(d_bh) ; + kfree(log_blocks) ; + kfree(real_blocks) ; + return 0 ; +} + +/* +** read and replay the log +** on a clean unmount, the journal header's next unflushed pointer will be to an invalid +** transaction. This tests that before finding all the transactions in the log, whic makes normal mount times fast. +** +** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid. +** +** On exit, it sets things up so the first transaction will work correctly. +*/ +static int journal_read(struct super_block *p_s_sb) { + struct reiserfs_journal_desc *desc ; + unsigned long last_flush_trans_id = 0 ; + unsigned long oldest_trans_id = 0; + unsigned long oldest_invalid_trans_id = 0 ; + time_t start ; + unsigned long last_flush_start = 0; + unsigned long oldest_start = 0; + unsigned long cur_dblock = 0 ; + unsigned long newest_mount_id = 9 ; + struct buffer_head *d_bh ; + struct reiserfs_journal_header *jh ; + int valid_journal_header = 0 ; + int replay_count = 0 ; + int continue_replay = 1 ; + int ret ; + + cur_dblock = reiserfs_get_journal_block(p_s_sb) ; + printk("reiserfs: checking transaction log (device %s) ...\n", + kdevname(p_s_sb->s_dev)) ; + start = CURRENT_TIME ; + + /* step 1, read in the journal header block. Check the transaction it says + ** is the first unflushed, and if that transaction is not valid, + ** replay is done + */ + SB_JOURNAL(p_s_sb)->j_header_bh = bread(p_s_sb->s_dev, + reiserfs_get_journal_block(p_s_sb) + + JOURNAL_BLOCK_COUNT, + p_s_sb->s_blocksize) ; + if (!SB_JOURNAL(p_s_sb)->j_header_bh) { + return 1 ; + } + jh = (struct reiserfs_journal_header *)(SB_JOURNAL(p_s_sb)->j_header_bh->b_data) ; + if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 && + le32_to_cpu(jh->j_first_unflushed_offset) < JOURNAL_BLOCK_COUNT && + le32_to_cpu(jh->j_last_flush_trans_id) > 0) { + last_flush_start = reiserfs_get_journal_block(p_s_sb) + + le32_to_cpu(jh->j_first_unflushed_offset) ; + last_flush_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) ; + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1153: found in " + "header: first_unflushed_offset %d, last_flushed_trans_id " + "%lu\n", le32_to_cpu(jh->j_first_unflushed_offset), + last_flush_trans_id) ; + valid_journal_header = 1 ; + + /* now, we try to read the first unflushed offset. If it is not valid, + ** there is nothing more we can do, and it makes no sense to read + ** through the whole log. + */ + d_bh = bread(p_s_sb->s_dev, reiserfs_get_journal_block(p_s_sb) + le32_to_cpu(jh->j_first_unflushed_offset), p_s_sb->s_blocksize) ; + ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL) ; + if (!ret) { + continue_replay = 0 ; + } + brelse(d_bh) ; + } + + /* ok, there are transactions that need to be replayed. start with the first log block, find + ** all the valid transactions, and pick out the oldest. + */ + while(continue_replay && cur_dblock < (reiserfs_get_journal_block(p_s_sb) + JOURNAL_BLOCK_COUNT)) { + d_bh = bread(p_s_sb->s_dev, cur_dblock, p_s_sb->s_blocksize) ; + ret = journal_transaction_is_valid(p_s_sb, d_bh, &oldest_invalid_trans_id, &newest_mount_id) ; + if (ret == 1) { + desc = (struct reiserfs_journal_desc *)d_bh->b_data ; + if (oldest_start == 0) { /* init all oldest_ values */ + oldest_trans_id = le32_to_cpu(desc->j_trans_id) ; + oldest_start = d_bh->b_blocknr ; + newest_mount_id = le32_to_cpu(desc->j_mount_id) ; + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1179: Setting " + "oldest_start to offset %lu, trans_id %lu\n", + oldest_start - reiserfs_get_journal_block(p_s_sb), + oldest_trans_id) ; + } else if (oldest_trans_id > le32_to_cpu(desc->j_trans_id)) { + /* one we just read was older */ + oldest_trans_id = le32_to_cpu(desc->j_trans_id) ; + oldest_start = d_bh->b_blocknr ; + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1180: Resetting " + "oldest_start to offset %lu, trans_id %lu\n", + oldest_start - reiserfs_get_journal_block(p_s_sb), + oldest_trans_id) ; + } + if (newest_mount_id < le32_to_cpu(desc->j_mount_id)) { + newest_mount_id = le32_to_cpu(desc->j_mount_id) ; + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " + "newest_mount_id to %d\n", le32_to_cpu(desc->j_mount_id)); + } + cur_dblock += le32_to_cpu(desc->j_len) + 2 ; + } + else { + cur_dblock++ ; + } + brelse(d_bh) ; + } + /* step three, starting at the oldest transaction, replay */ + if (last_flush_start > 0) { + oldest_start = last_flush_start ; + oldest_trans_id = last_flush_trans_id ; + } + cur_dblock = oldest_start ; + if (oldest_trans_id) { + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1206: Starting replay " + "from offset %lu, trans_id %lu\n", + cur_dblock - reiserfs_get_journal_block(p_s_sb), + oldest_trans_id) ; + + } + replay_count = 0 ; + while(continue_replay && oldest_trans_id > 0) { + ret = journal_read_transaction(p_s_sb, cur_dblock, oldest_start, oldest_trans_id, newest_mount_id) ; + if (ret < 0) { + return ret ; + } else if (ret != 0) { + break ; + } + cur_dblock = reiserfs_get_journal_block(p_s_sb) + SB_JOURNAL(p_s_sb)->j_start ; + replay_count++ ; + } + + if (oldest_trans_id == 0) { + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1225: No valid " + "transactions found\n") ; + } + /* j_start does not get set correctly if we don't replay any transactions. + ** if we had a valid journal_header, set j_start to the first unflushed transaction value, + ** copy the trans_id from the header + */ + if (valid_journal_header && replay_count == 0) { + SB_JOURNAL(p_s_sb)->j_start = le32_to_cpu(jh->j_first_unflushed_offset) ; + SB_JOURNAL(p_s_sb)->j_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1; + SB_JOURNAL(p_s_sb)->j_last_flush_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) ; + SB_JOURNAL(p_s_sb)->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1; + } else { + SB_JOURNAL(p_s_sb)->j_mount_id = newest_mount_id + 1 ; + } + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " + "newest_mount_id to %lu\n", SB_JOURNAL(p_s_sb)->j_mount_id) ; + SB_JOURNAL(p_s_sb)->j_first_unflushed_offset = SB_JOURNAL(p_s_sb)->j_start ; + if (replay_count > 0) { + printk("reiserfs: replayed %d transactions in %lu seconds\n", replay_count, + CURRENT_TIME - start) ; + } + update_journal_header_block(p_s_sb, SB_JOURNAL(p_s_sb)->j_start, SB_JOURNAL(p_s_sb)->j_last_flush_trans_id) ; + return 0 ; +} + + +struct reiserfs_journal_commit_task { + struct super_block *p_s_sb ; + int jindex ; + int wake_on_finish ; /* if this is one, we wake the task_done queue, if it + ** is zero, we free the whole struct on finish + */ + struct reiserfs_journal_commit_task *self ; + struct wait_queue *task_done ; + struct tq_struct task ; +} ; + +static void reiserfs_journal_commit_task_func(struct reiserfs_journal_commit_task *ct) { + + flush_commit_list(ct->p_s_sb, SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex, 1) ; + kfree(ct->self) ; +} + +static void setup_commit_task_arg(struct reiserfs_journal_commit_task *ct, + struct super_block *p_s_sb, + int jindex) { + if (!ct) { + reiserfs_panic(NULL, "journal-1360: setup_commit_task_arg called with NULL struct\n") ; + } + ct->p_s_sb = p_s_sb ; + ct->jindex = jindex ; + ct->task_done = NULL ; + ct->task.next = NULL ; + ct->task.sync = 0 ; + ct->task.routine = (void *)(void *)reiserfs_journal_commit_task_func ; + ct->self = ct ; + ct->task.data = (void *)ct ; +} + +static void commit_flush_async(struct super_block *p_s_sb, int jindex) { + struct reiserfs_journal_commit_task *ct ; + /* using GFP_BUFFER, GFP_KERNEL could try to flush inodes, which will try + ** to start/join a transaction, which will deadlock + */ + ct = kmalloc(sizeof(struct reiserfs_journal_commit_task), GFP_BUFFER) ; + if (ct) { + setup_commit_task_arg(ct, p_s_sb, jindex) ; + queue_task(&(ct->task), &reiserfs_commit_thread_tq); + wake_up(&reiserfs_commit_thread_wait) ; + } else { +#ifdef CONFIG_REISERFS_CHECK + reiserfs_warning("journal-1540: kmalloc failed, doing sync commit\n") ; +#endif + flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1) ; + } +} + +/* +** this is the commit thread. It is started with kernel_thread on +** FS mount, and journal_release() waits for it to exit. +** +** It could do a periodic commit, but there is a lot code for that +** elsewhere right now, and I only wanted to implement this little +** piece for starters. +** +** All we do here is sleep on the j_commit_thread_wait wait queue, and +** then run the per filesystem commit task queue when we wakeup. +*/ +static int reiserfs_journal_commit_thread(void *nullp) { + int windex ; + exit_files(current); + exit_mm(current); + + spin_lock_irq(¤t->sigmask_lock); + sigfillset(¤t->blocked); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + current->session = 1; + current->pgrp = 1; + sprintf(current->comm, "kreiserfsd") ; + lock_kernel() ; + while(1) { + windex = push_journal_writer("commit thread") ; + + while(reiserfs_commit_thread_tq) { + run_task_queue(&reiserfs_commit_thread_tq) ; + } + + /* if there aren't any more filesystems left, break */ + if (reiserfs_mounted_fs_count <= 0) { + run_task_queue(&reiserfs_commit_thread_tq) ; + pop_journal_writer(windex) ; + break ; + } + wake_up(&reiserfs_commit_thread_done) ; + pop_journal_writer(windex) ; + interruptible_sleep_on_timeout(&reiserfs_commit_thread_wait, 5) ; + } + unlock_kernel() ; + wake_up(&reiserfs_commit_thread_done) ; + return 0 ; +} + +static void journal_list_init(struct super_block *p_s_sb) { + int i ; + for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) { + init_waitqueue_head(&(SB_JOURNAL_LIST(p_s_sb)[i].j_commit_wait)) ; + init_waitqueue_head(&(SB_JOURNAL_LIST(p_s_sb)[i].j_flush_wait)) ; + } +} + +/* +** must be called once on fs mount. calls journal_read for you +*/ +int journal_init(struct super_block *p_s_sb) { + int num_cnodes = JOURNAL_BLOCK_COUNT * 2 ; + + if (sizeof(struct reiserfs_journal_commit) != 4096 || + sizeof(struct reiserfs_journal_desc) != 4096 + ) { + printk("journal-1249: commit or desc struct not 4096 %d %d\n", sizeof(struct reiserfs_journal_commit), + sizeof(struct reiserfs_journal_desc)) ; + return 1 ; + } + /* sanity check to make sure they don't overflow the journal */ + if (JOURNAL_BLOCK_COUNT > reiserfs_get_journal_orig_size(p_s_sb)) { + printk("journal-1393: current JOURNAL_BLOCK_COUNT (%d) is too big. This FS was created with a journal size of %lu blocks\n", + JOURNAL_BLOCK_COUNT, reiserfs_get_journal_orig_size(p_s_sb)) ; + return 1 ; + } + SB_JOURNAL(p_s_sb) = vmalloc(sizeof (struct reiserfs_journal)) ; + + if (!SB_JOURNAL(p_s_sb)) { + printk("journal-1256: unable to get memory for journal structure\n") ; + return 1 ; + } + memset(SB_JOURNAL(p_s_sb), 0, sizeof(struct reiserfs_journal)) ; + + SB_JOURNAL(p_s_sb)->j_list_bitmap_index = 0 ; + SB_JOURNAL_LIST_INDEX(p_s_sb) = -10000 ; /* make sure flush_old_commits does not try to flush a list while replay is on */ + + /* clear out the journal list array */ + memset(SB_JOURNAL_LIST(p_s_sb), 0, sizeof(struct reiserfs_journal_list) * JOURNAL_LIST_COUNT) ; + journal_list_init(p_s_sb) ; + + memset(SB_JOURNAL(p_s_sb)->j_list_hash_table, 0, JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ; + memset(journal_writers, 0, sizeof(char *) * 512) ; /* debug code */ + + INIT_LIST_HEAD(&SB_JOURNAL(p_s_sb)->j_bitmap_nodes) ; + reiserfs_allocate_list_bitmaps(p_s_sb, SB_JOURNAL(p_s_sb)->j_list_bitmap, + SB_BMAP_NR(p_s_sb)) ; + allocate_bitmap_nodes(p_s_sb) ; + + SB_JOURNAL(p_s_sb)->j_start = 0 ; + SB_JOURNAL(p_s_sb)->j_len = 0 ; + SB_JOURNAL(p_s_sb)->j_len_alloc = 0 ; + atomic_set(&(SB_JOURNAL(p_s_sb)->j_wcount), 0) ; + SB_JOURNAL(p_s_sb)->j_bcount = 0 ; + SB_JOURNAL(p_s_sb)->j_trans_start_time = 0 ; + SB_JOURNAL(p_s_sb)->j_last = NULL ; + SB_JOURNAL(p_s_sb)->j_first = NULL ; + init_waitqueue_head(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ; + init_waitqueue_head(&(SB_JOURNAL(p_s_sb)->j_wait)) ; + + SB_JOURNAL(p_s_sb)->j_trans_id = 10 ; + SB_JOURNAL(p_s_sb)->j_mount_id = 10 ; + SB_JOURNAL(p_s_sb)->j_state = 0 ; + atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ; + atomic_set(&(SB_JOURNAL(p_s_sb)->j_wlock), 0) ; + SB_JOURNAL(p_s_sb)->j_cnode_free_list = allocate_cnodes(num_cnodes) ; + SB_JOURNAL(p_s_sb)->j_cnode_free_orig = SB_JOURNAL(p_s_sb)->j_cnode_free_list ; + SB_JOURNAL(p_s_sb)->j_cnode_free = SB_JOURNAL(p_s_sb)->j_cnode_free_list ? num_cnodes : 0 ; + SB_JOURNAL(p_s_sb)->j_cnode_used = 0 ; + SB_JOURNAL(p_s_sb)->j_must_wait = 0 ; + init_journal_hash(p_s_sb) ; + SB_JOURNAL_LIST(p_s_sb)[0].j_list_bitmap = get_list_bitmap(p_s_sb, SB_JOURNAL_LIST(p_s_sb)) ; + if (!(SB_JOURNAL_LIST(p_s_sb)[0].j_list_bitmap)) { + reiserfs_warning("journal-2005, get_list_bitmap failed for journal list 0\n") ; + return 1 ; + } + if (journal_read(p_s_sb) < 0) { + reiserfs_warning("Replay Failure, unable to mount\n") ; + free_journal_ram(p_s_sb) ; + return 1 ; + } + SB_JOURNAL_LIST_INDEX(p_s_sb) = 0 ; /* once the read is done, we can set this where it belongs */ + + if (reiserfs_dont_log (p_s_sb)) + return 0; + + reiserfs_mounted_fs_count++ ; + if (reiserfs_mounted_fs_count <= 1) { + kernel_thread((void *)(void *)reiserfs_journal_commit_thread, NULL, + CLONE_FS | CLONE_FILES | CLONE_VM) ; + } + return 0 ; +} + +/* +** test for a polite end of the current transaction. Used by file_write, and should +** be used by delete to make sure they don't write more than can fit inside a single +** transaction +*/ +int journal_transaction_should_end(struct reiserfs_transaction_handle *th, int new_alloc) { + time_t now = CURRENT_TIME ; + if (reiserfs_dont_log(th->t_super)) + return 0 ; + if ( SB_JOURNAL(th->t_super)->j_must_wait > 0 || + (SB_JOURNAL(th->t_super)->j_len_alloc + new_alloc) >= JOURNAL_MAX_BATCH || + atomic_read(&(SB_JOURNAL(th->t_super)->j_jlock)) || + (now - SB_JOURNAL(th->t_super)->j_trans_start_time) > JOURNAL_MAX_TRANS_AGE || + SB_JOURNAL(th->t_super)->j_cnode_free < (JOURNAL_TRANS_MAX * 3)) { + return 1 ; + } + return 0 ; +} + +/* join == true if you must join an existing transaction. +** join == false if you can deal with waiting for others to finish +** +** this will block until the transaction is joinable. send the number of blocks you +** expect to use in nblocks. +*/ +static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb,unsigned long nblocks,int join) { + time_t now = CURRENT_TIME ; + int windex ; + int old_trans_id ; + /* int free_some_ram = 0 ; -Hans */ + + reiserfs_check_lock_depth("journal_begin") ; + + if (reiserfs_dont_log(p_s_sb)) { + th->t_super = p_s_sb ; /* others will check this for the don't log flag */ + return 0 ; + } + + /* we never want to make kswapd wait, never, ever. So, the only time + ** we make it wait is when joining the transaction would result in + ** overflow. + ** + ** ugly, nasty stuff, we need real callbacks from the VFS layer to do + ** this right. + */ + if (!strcmp(current->comm, "kswapd")) { + /* + SB_JOURNAL(p_s_sb)->j_must_wait = 1 ; + flush_async_commits(p_s_sb) ; + */ + if ((SB_JOURNAL(p_s_sb)->j_len_alloc + nblocks + 2) < JOURNAL_MAX_BATCH) { + join = 1 ; + } + } + + lock_journal(p_s_sb) ; + + /* if there is no room in the journal OR + ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning + ** we don't sleep if there aren't other writers + */ + + if ( (!join && SB_JOURNAL(p_s_sb)->j_must_wait > 0) || + ( !join && (SB_JOURNAL(p_s_sb)->j_len_alloc + nblocks + 2) >= JOURNAL_MAX_BATCH) || + (!join && atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) > 0 && SB_JOURNAL(p_s_sb)->j_trans_start_time > 0 && + (now - SB_JOURNAL(p_s_sb)->j_trans_start_time) > JOURNAL_MAX_TRANS_AGE) || + (!join && atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) ) || + (!join && SB_JOURNAL(p_s_sb)->j_cnode_free < (JOURNAL_TRANS_MAX * 3))) { + + unlock_journal(p_s_sb) ; /* allow others to finish this transaction */ + + /* if writer count is 0, we can just force this transaction to end, and start + ** a new one afterwards. + */ + if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0) { + struct reiserfs_transaction_handle myth ; + journal_join(&myth, p_s_sb, 1) ; + windex = push_journal_writer("journal_begin") ; + reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; + journal_mark_dirty(&myth, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; + pop_journal_writer(windex) ; + do_journal_end(&myth, p_s_sb,1,COMMIT_NOW) ; + } else { + /* but if the writer count isn't zero, we have to wait for the current writers to finish. + ** They won't batch on transaction end once we set j_jlock + */ + atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 1) ; + old_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ; + while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) && + SB_JOURNAL(p_s_sb)->j_trans_id == old_trans_id) { + sleep_on(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ; + } + } + lock_journal(p_s_sb) ; /* relock to continue */ + } + + if (SB_JOURNAL(p_s_sb)->j_trans_start_time == 0) { /* we are the first writer, set trans_id */ + SB_JOURNAL(p_s_sb)->j_trans_start_time = now ; + } + atomic_inc(&(SB_JOURNAL(p_s_sb)->j_wcount)) ; + SB_JOURNAL(p_s_sb)->j_len_alloc += nblocks ; + th->t_blocks_logged = 0 ; + th->t_blocks_allocated = nblocks ; + th->t_super = p_s_sb ; + th->t_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ; + th->t_caller = "Unknown" ; + unlock_journal(p_s_sb) ; + p_s_sb->s_dirt = 1; + return 0 ; +} + + +int journal_join(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { + return do_journal_begin_r(th, p_s_sb, nblocks, 1) ; +} + +int journal_begin(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, unsigned long nblocks) { + return do_journal_begin_r(th, p_s_sb, nblocks, 0) ; +} + +/* not used at all */ +int journal_prepare(struct super_block * p_s_sb, struct buffer_head *bh) { + return 0 ; +} + +/* +** puts bh into the current transaction. If it was already there, reorders removes the +** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order). +** +** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the +** transaction is committed. +** +** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. +*/ +int journal_mark_dirty(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, struct buffer_head *bh) { + struct reiserfs_journal_cnode *cn = NULL; + int count_already_incd = 0 ; + int windex ; + int prepared = 0 ; + + if (reiserfs_dont_log(th->t_super)) { + mark_buffer_dirty(bh) ; + return 0 ; + } + + windex = push_journal_writer("journal_mark_dirty") ; + if (th->t_trans_id != SB_JOURNAL(p_s_sb)->j_trans_id) { + reiserfs_panic(th->t_super, "journal-1577: handle trans id %d != current trans id %d\n", + th->t_trans_id, SB_JOURNAL(p_s_sb)->j_trans_id); + } + p_s_sb->s_dirt = 1 ; + + prepared = test_and_clear_bit(BH_JPrepared, &bh->b_state) ; + /* already in this transaction, we are done */ + if (buffer_journaled(bh)) { + pop_journal_writer(windex) ; + return 0 ; + } + +#if 0 + /* this must be turned into a panic instead of a warning. We can't allow + ** a dirty or journal_dirty or locked buffer to be logged, as some changes + ** could get to disk too early. NOT GOOD. + */ + if (!prepared || buffer_locked(bh) || buffer_dirty(bh)) { + printk("journal-1777: buffer %lu bad state %cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT\n", bh->b_blocknr, prepared ? ' ' : '!', + buffer_locked(bh) ? ' ' : '!', + buffer_dirty(bh) ? ' ' : '!', + buffer_journal_dirty(bh) ? ' ' : '!') ; + show_reiserfs_locks() ; + } +#endif + count_already_incd = clear_prepared_bits(bh) ; + + if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0) { + printk("journal-1409: journal_mark_dirty returning because j_wcount was %d\n", atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount))) ; + pop_journal_writer(windex) ; + return 1 ; + } + /* this error means I've screwed up, and we've overflowed the transaction. + ** Nothing can be done here, except make the FS readonly or panic. + */ + if (SB_JOURNAL(p_s_sb)->j_len >= JOURNAL_TRANS_MAX) { + reiserfs_panic(th->t_super, "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n", SB_JOURNAL(p_s_sb)->j_len) ; + } + + if (buffer_journal_dirty(bh)) { + count_already_incd = 1 ; + mark_buffer_notjournal_dirty(bh) ; + } + + if (buffer_dirty(bh)) { + clear_bit(BH_Dirty, &bh->b_state) ; + } + + if (buffer_journaled(bh)) { /* must double check after getting lock */ + goto done ; + } + + if (SB_JOURNAL(p_s_sb)->j_len > SB_JOURNAL(p_s_sb)->j_len_alloc) { + SB_JOURNAL(p_s_sb)->j_len_alloc = SB_JOURNAL(p_s_sb)->j_len + JOURNAL_PER_BALANCE_CNT ; + } + + set_bit(BH_JDirty, &bh->b_state) ; + + /* now put this guy on the end */ + if (!cn) { + cn = get_cnode(p_s_sb) ; + if (!cn) { + reiserfs_panic(p_s_sb, "get_cnode failed!\n"); + } + + if (th->t_blocks_logged == th->t_blocks_allocated) { + th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT ; + SB_JOURNAL(p_s_sb)->j_len_alloc += JOURNAL_PER_BALANCE_CNT ; + } + th->t_blocks_logged++ ; + SB_JOURNAL(p_s_sb)->j_len++ ; + + cn->bh = bh ; + cn->blocknr = bh->b_blocknr ; + cn->dev = bh->b_dev ; + cn->jlist = NULL ; + insert_journal_hash(SB_JOURNAL(p_s_sb)->j_hash_table, cn) ; + if (!count_already_incd) { + atomic_inc(&(bh->b_count)) ; + } + } + cn->next = NULL ; + cn->prev = SB_JOURNAL(p_s_sb)->j_last ; + cn->bh = bh ; + if (SB_JOURNAL(p_s_sb)->j_last) { + SB_JOURNAL(p_s_sb)->j_last->next = cn ; + SB_JOURNAL(p_s_sb)->j_last = cn ; + } else { + SB_JOURNAL(p_s_sb)->j_first = cn ; + SB_JOURNAL(p_s_sb)->j_last = cn ; + } +done: + pop_journal_writer(windex) ; + return 0 ; +} + +/* +** if buffer already in current transaction, do a journal_mark_dirty +** otherwise, just mark it dirty and move on. Used for writes to meta blocks +** that don't need journaling +*/ +int journal_mark_dirty_nolog(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, struct buffer_head *bh) { + int windex = push_journal_writer("dirty_nolog") ; + if (reiserfs_dont_log(th->t_super) || buffer_journaled(bh) || + buffer_journal_dirty(bh)) { + pop_journal_writer(windex) ; + return journal_mark_dirty(th, p_s_sb, bh) ; + } + if (get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_list_hash_table, bh->b_dev,bh->b_blocknr,bh->b_size)) { + pop_journal_writer(windex) ; + return journal_mark_dirty(th, p_s_sb, bh) ; + } + mark_buffer_dirty(bh) ; + pop_journal_writer(windex) ; + return 0 ; +} + +int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { + return do_journal_end(th, p_s_sb, nblocks, 0) ; +} + +/* removes from the current transaction, relsing and descrementing any counters. +** also files the removed buffer directly onto the clean list +** +** called by journal_mark_freed when a block has been deleted +** +** returns 1 if it cleaned and relsed the buffer. 0 otherwise +*/ +int remove_from_transaction(struct super_block *p_s_sb, unsigned long blocknr, int already_cleaned) { + struct buffer_head *bh ; + struct reiserfs_journal_cnode *cn ; + int ret = 0; + + cn = get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_hash_table, p_s_sb->s_dev, blocknr, p_s_sb->s_blocksize) ; + if (!cn || !cn->bh) { + return ret ; + } + bh = cn->bh ; + if (cn->prev) { + cn->prev->next = cn->next ; + } + if (cn->next) { + cn->next->prev = cn->prev ; + } + if (cn == SB_JOURNAL(p_s_sb)->j_first) { + SB_JOURNAL(p_s_sb)->j_first = cn->next ; + } + if (cn == SB_JOURNAL(p_s_sb)->j_last) { + SB_JOURNAL(p_s_sb)->j_last = cn->prev ; + } + remove_journal_hash(SB_JOURNAL(p_s_sb)->j_hash_table, NULL, bh, 0) ; + mark_buffer_not_journaled(bh) ; /* don't log this one */ + + if (!already_cleaned) { + mark_buffer_notjournal_dirty(bh) ; + atomic_dec(&(bh->b_count)) ; + if (atomic_read(&(bh->b_count)) < 0) { + printk("journal-1752: remove from trans, b_count < 0\n") ; + } + if (!buffer_locked(bh)) reiserfs_clean_and_file_buffer(bh) ; + ret = 1 ; + } + SB_JOURNAL(p_s_sb)->j_len-- ; + SB_JOURNAL(p_s_sb)->j_len_alloc-- ; + free_cnode(p_s_sb, cn) ; + return ret ; +} + +/* removes from a specific journal list hash */ +int remove_from_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, struct buffer_head *bh, int remove_freed) { + remove_journal_hash(SB_JOURNAL(s)->j_list_hash_table, jl, bh, remove_freed) ; + return 0 ; +} + +/* +** for any cnode in a journal list, it can only be dirtied of all the +** transactions that include it are commited to disk. +** this checks through each transaction, and returns 1 if you are allowed to dirty, +** and 0 if you aren't +** +** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log +** blocks for a given transaction on disk +** +*/ +static int can_dirty(struct reiserfs_journal_cnode *cn) { + kdev_t dev = cn->dev ; + unsigned long blocknr = cn->blocknr ; + struct reiserfs_journal_cnode *cur = cn->hprev ; + int can_dirty = 1 ; + + /* first test backwards */ + while(cur && can_dirty) { + if (cur->jlist && cur->jlist->j_len > 0 && atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh && cur->blocknr && + cur->dev == dev && cur->blocknr == blocknr + ) { + can_dirty = 0 ; + } + cur = cur->hprev ; + } + /* then test fowards */ + cur = cn->hnext ; + while(cur && can_dirty) { + if (cur->jlist && cur->jlist->j_len > 0 && atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh && cur->blocknr && + cur->dev == dev && cur->blocknr == blocknr + ) { + can_dirty = 0 ; + } + cur = cur->hnext ; + } + return can_dirty ; +} + +/* +** Whereever possible, this dirties and releases the real blocks associated +** with a transaction +** +** called by flush_commit_list, after all the log blocks for a transaction +** are on disk. +*/ +static void dirty_journal_list(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) { + struct buffer_head *tbh ; + struct reiserfs_journal_cnode *cn ; + int dirty_it ; + int windex = push_journal_writer("dirty_journal_list") ; + + cn = jl->j_realblock ; + + while(cn) { + /* remove_from_journal_list invalidates the bh in j_realblock, must + ** copy it first */ + tbh = cn->bh ; + + /* default to dirty the block */ + dirty_it = 1 ; + if (cn->blocknr && tbh) { + /* we only want to dirty the block if all the log blocks in all + his transactions are on disk */ + dirty_it = can_dirty(cn) ; + if (dirty_it) { + if (buffer_journal_dirty(tbh)) { + if (test_bit(BH_JPrepared, &tbh->b_state)) { + set_bit(BH_JRestore_dirty, &tbh->b_state) ; + } else { + mark_buffer_dirty(tbh) ; + } + } + } + } + cn = cn->next ; + } + pop_journal_writer(windex) ; +} + +/* syncs the commit blocks, but does not force the real buffers to disk +** will wait until the current transaction is done/commited before returning +*/ +int journal_end_sync(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { + return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT) ; +} + +#ifdef __KERNEL__ +int show_reiserfs_locks(void) { + + dump_journal_writers() ; +#if 0 /* debugging code for when we are compiled static don't delete */ + p_s_sb = sb_entry(super_blocks.next); + while (p_s_sb != sb_entry(&super_blocks)) { + if (reiserfs_is_super(p_s_sb)) { +printk("journal lock is %d, join lock is %d, writers %d must wait is %d\n", + atomic_read(&(SB_JOURNAL(p_s_sb)->j_wlock)), + atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)), + atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)), + SB_JOURNAL(p_s_sb)->j_must_wait) ; + printk("used cnodes %d, free cnodes %d\n", SB_JOURNAL(p_s_sb)->j_cnode_used, SB_JOURNAL(p_s_sb)->j_cnode_free) ; + } + p_s_sb = sb_entry(p_s_sb->s_list.next); + } +#endif + return 0 ; +} +#endif + +/* +** used to get memory back from async commits that are floating around +** and to reclaim any blocks deleted but unusable because their commits +** haven't hit disk yet. called from bitmap.c +** +** if it starts flushing things, it ors SCHEDULE_OCCURRED into repeat. +** note, this is just if schedule has a chance of occuring. I need to +** change flush_commit_lists to have a repeat parameter too. +** +*/ +void flush_async_commits(struct super_block *p_s_sb) { + int i ; + int windex ; + + windex = push_journal_writer("flush_async_commits") ; + for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) { + if (i != SB_JOURNAL_LIST_INDEX(p_s_sb)) { + flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + i, 1) ; + } + } + pop_journal_writer(windex) ; +} + +/* +** flushes any old transactions to disk +** ends the current transaction if it is too old +** +** also calls flush_journal_list with old_only == 1, which allows me to reclaim +** memory and such from the journal lists whose real blocks are all on disk. +** +** called by sync_dev_journal from buffer.c +*/ +int flush_old_commits(struct super_block *p_s_sb, int immediate) { + int i ; + int count = 0; + int start ; + time_t now ; + int windex ; + int keep_dirty = 0 ; + struct reiserfs_transaction_handle th ; + + start = SB_JOURNAL_LIST_INDEX(p_s_sb) ; + now = CURRENT_TIME ; + + /* safety check so we don't flush while we are replaying the log during mount */ + if (SB_JOURNAL_LIST_INDEX(p_s_sb) < 0) { + return 0 ; + } + if (!strcmp(current->comm, "kupdate")) { + immediate = 0 ; + keep_dirty = 1 ; + } + /* starting with oldest, loop until we get to the start */ + i = (SB_JOURNAL_LIST_INDEX(p_s_sb) + 1) % JOURNAL_LIST_COUNT ; + while(i != start) { + if (SB_JOURNAL_LIST(p_s_sb)[i].j_len > 0 && ((now - SB_JOURNAL_LIST(p_s_sb)[i].j_timestamp) > JOURNAL_MAX_COMMIT_AGE || + immediate)) { + /* we have to check again to be sure the current transaction did not change */ + if (i != SB_JOURNAL_LIST_INDEX(p_s_sb)) { + flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + i, 1) ; + } + } + /* now we free ram used by the old journal lists */ + if (SB_JOURNAL_LIST(p_s_sb)[i].j_len > 0 && i != SB_JOURNAL_LIST_INDEX(p_s_sb)) { + flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + i, 1, 0) ; /* old_only, and don't flush all, + we only want to reclaim nodes if it will be fast */ + } + i = (i + 1) % JOURNAL_LIST_COUNT ; + count++ ; + } + /* now, check the current transaction. If there are no writers, and it is too old, finish it, and + ** force the commit blocks to disk + */ + if (!immediate && atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0 && + SB_JOURNAL(p_s_sb)->j_trans_start_time > 0 && + SB_JOURNAL(p_s_sb)->j_len > 0 && + (now - SB_JOURNAL(p_s_sb)->j_trans_start_time) > JOURNAL_MAX_TRANS_AGE) { +/* +printk("journal-1743: current trans is too old (%lu seconds)...flushing (immediate was %d)\n", + now - SB_JOURNAL(p_s_sb)->j_trans_start_time, immediate) ; +*/ + journal_join(&th, p_s_sb, 1) ; + windex = push_journal_writer("flush_old_commits") ; + reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; + journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; + pop_journal_writer(windex) ; + do_journal_end(&th, p_s_sb,1, COMMIT_NOW) ; + keep_dirty = 0 ; + } else if (immediate) { /* belongs above, but I wanted this to be very explicit as a special case. If they say to + flush, we must be sure old transactions hit the disk too. */ + journal_join(&th, p_s_sb, 1) ; + reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; + journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; + do_journal_end(&th, p_s_sb,1, COMMIT_NOW | WAIT) ; + } + return keep_dirty ; +} + +/* +** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit +** +** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all +** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just +** flushes the commit list and returns 0. +** +** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait. +** +** Note, we can't allow the journal_end to proceed while there are still writers in the log. +*/ +static int check_journal_end(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, + unsigned long nblocks, int flags) { + + time_t now ; + int flush = flags & FLUSH_ALL ; + int commit_now = flags & COMMIT_NOW ; + int wait_on_commit = flags & WAIT ; + + if (th->t_trans_id != SB_JOURNAL(p_s_sb)->j_trans_id) { + reiserfs_panic(th->t_super, "journal-1577: handle trans id %d != current trans id %d\n", + th->t_trans_id, SB_JOURNAL(p_s_sb)->j_trans_id); + } + + SB_JOURNAL(p_s_sb)->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged) ; + if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */ + atomic_dec(&(SB_JOURNAL(p_s_sb)->j_wcount)) ; + } + + /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released + ** will be dealt with by next transaction that actually writes something, but should be taken + ** care of in this trans + */ + if (SB_JOURNAL(p_s_sb)->j_len == 0) { + int wcount = atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) ; + unlock_journal(p_s_sb) ; + if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) > 0 && wcount <= 0) { + atomic_dec(&(SB_JOURNAL(p_s_sb)->j_jlock)) ; + wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ; + } + return 0 ; + } + /* if wcount > 0, and we are called to with flush or commit_now, + ** we wait on j_join_wait. We will wake up when the last writer has + ** finished the transaction, and started it on its way to the disk. + ** Then, we flush the commit or journal list, and just return 0 + ** because the rest of journal end was already done for this transaction. + */ + if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) > 0) { + if (flush || commit_now) { + int orig_jindex = SB_JOURNAL_LIST_INDEX(p_s_sb) ; + atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 1) ; + if (flush) { + SB_JOURNAL(p_s_sb)->j_next_full_flush = 1 ; + } + unlock_journal(p_s_sb) ; + /* sleep while the current transaction is still j_jlocked */ + while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) && + SB_JOURNAL(p_s_sb)->j_trans_id == th->t_trans_id) { + sleep_on(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ; + } + if (commit_now) { + if (wait_on_commit) { + flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ; + } else { + commit_flush_async(p_s_sb, orig_jindex) ; + } + } + return 0 ; + } + unlock_journal(p_s_sb) ; + return 0 ; + } + + /* deal with old transactions where we are the last writers */ + now = CURRENT_TIME ; + if ((now - SB_JOURNAL(p_s_sb)->j_trans_start_time) > JOURNAL_MAX_TRANS_AGE) { + commit_now = 1 ; + SB_JOURNAL(p_s_sb)->j_next_async_flush = 1 ; + } + /* don't batch when someone is waiting on j_join_wait */ + /* don't batch when syncing the commit or flushing the whole trans */ + if (!(SB_JOURNAL(p_s_sb)->j_must_wait > 0) && !(atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock))) && !flush && !commit_now && + (SB_JOURNAL(p_s_sb)->j_len < JOURNAL_MAX_BATCH) && + SB_JOURNAL(p_s_sb)->j_len_alloc < JOURNAL_MAX_BATCH && SB_JOURNAL(p_s_sb)->j_cnode_free > (JOURNAL_TRANS_MAX * 3)) { + SB_JOURNAL(p_s_sb)->j_bcount++ ; + unlock_journal(p_s_sb) ; + return 0 ; + } + + if (SB_JOURNAL(p_s_sb)->j_start > JOURNAL_BLOCK_COUNT) { + reiserfs_panic(p_s_sb, "journal-003: journal_end: j_start (%d) is too high\n", SB_JOURNAL(p_s_sb)->j_start) ; + } + return 1 ; +} + +/* +** Does all the work that makes deleting blocks safe. +** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on. +** +** otherwise: +** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes +** before this transaction has finished. +** +** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. That will prevent any old transactions with +** this block from trying to flush to the real location. Since we aren't removing the cnode from the journal_list_hash, +** the block can't be reallocated yet. +** +** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. +*/ +int journal_mark_freed(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long blocknr) { + struct reiserfs_journal_cnode *cn = NULL ; + struct buffer_head *bh = NULL ; + struct reiserfs_list_bitmap *jb = NULL ; + int cleaned = 0 ; + int windex = push_journal_writer("journal_mark_freed") ; + + if (reiserfs_dont_log(th->t_super)) { + bh = get_hash_table(p_s_sb->s_dev, blocknr, p_s_sb->s_blocksize) ; + if (bh && buffer_dirty (bh)) { + printk ("journal_mark_freed(dont_log): dirty buffer on hash list: %lx %ld\n", bh->b_state, blocknr); + BUG (); + } + brelse (bh); + pop_journal_writer(windex) ; + return 0 ; + } + bh = get_hash_table(p_s_sb->s_dev, blocknr, p_s_sb->s_blocksize) ; + /* if it is journal new, we just remove it from this transaction */ + if (bh && buffer_journal_new(bh)) { + clear_prepared_bits(bh) ; + cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned) ; + } else { + /* set the bit for this block in the journal bitmap for this transaction */ + jb = SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_list_bitmap ; + if (!jb) { + reiserfs_panic(p_s_sb, "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n") ; + } + set_bit_in_list_bitmap(p_s_sb, blocknr, jb) ; + + /* Note, the entire while loop is not allowed to schedule. */ + + if (bh) { + clear_prepared_bits(bh) ; + } + cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned) ; + + /* find all older transactions with this block, make sure they don't try to write it out */ + cn = get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_list_hash_table, p_s_sb->s_dev, blocknr, p_s_sb->s_blocksize) ; + while (cn) { + if (p_s_sb->s_dev == cn->dev && blocknr == cn->blocknr) { + set_bit(BLOCK_FREED, &cn->state) ; + if (cn->bh) { + if (!cleaned) { + /* remove_from_transaction will brelse the buffer if it was + ** in the current trans + */ + mark_buffer_notjournal_dirty(cn->bh) ; + if (!buffer_locked(cn->bh)) { + reiserfs_clean_and_file_buffer(cn->bh) ; + } + cleaned = 1 ; + atomic_dec(&(cn->bh->b_count)) ; + if (atomic_read(&(cn->bh->b_count)) < 0) { + printk("journal-2138: cn->bh->b_count < 0\n") ; + } + } + if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ + atomic_dec(&(cn->jlist->j_nonzerolen)) ; + } + cn->bh = NULL ; + } + } + cn = cn->hnext ; + } + } + + if (bh) { + atomic_dec(&(bh->b_count)) ; /* get_hash incs this */ + if (atomic_read(&(bh->b_count)) < 0) { + printk("journal-2165: bh->b_count < 0\n") ; + } + } + pop_journal_writer(windex) ; + return 0 ; +} + +void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb, + struct buffer_head *bh) { + if (reiserfs_dont_log (p_s_sb)) + return; + + if (!bh) { + return ; + } + if (test_and_clear_bit(BH_JPrepared, &bh->b_state)) { + if (test_and_clear_bit(BH_JRestore_dirty, &bh->b_state)) { + if (!buffer_journaled(bh)) { + mark_buffer_dirty(bh) ; + } + } + } +} + +extern struct tree_balance *cur_tb ; +/* +** before we can change a metadata block, we have to make sure it won't +** be written to disk while we are altering it. So, we must: +** clean it +** wait on it. +** +*/ +void reiserfs_prepare_for_journal(struct super_block *p_s_sb, + struct buffer_head *bh, int wait) { + int windex ; + int retry_count = 0 ; + + if (reiserfs_dont_log (p_s_sb)) + return; + + windex = push_journal_writer("prepare_for_journal") ; + while(!test_bit(BH_JPrepared, &bh->b_state) || + (wait && buffer_locked(bh))) { + if (buffer_journaled(bh)) { + set_bit(BH_JPrepared, &bh->b_state) ; + pop_journal_writer(windex) ; + return ; + } + set_bit(BH_JPrepared, &bh->b_state) ; + if (test_and_clear_bit(BH_Dirty, &bh->b_state)) { + set_bit(BH_JRestore_dirty, &bh->b_state) ; + } + if (wait) { +#ifdef CONFIG_REISERFS_CHECK + if (buffer_locked(bh) && cur_tb != NULL) { + printk("reiserfs_prepare_for_journal, waiting while do_balance was running\n") ; + BUG() ; + } +#endif + wait_on_buffer(bh) ; + } + retry_count++ ; + } + pop_journal_writer(windex) ; +} + +/* + * Wait for a page to get unlocked. + * + * This must be called with the caller "holding" the page, + * ie with increased "page->count" so that the page won't + * go away during the wait.. + */ +static void ___reiserfs_wait_on_page(struct reiserfs_page_list *pl) +{ + struct task_struct *tsk = current; + struct page *page = pl->page ; + DECLARE_WAITQUEUE(wait, tsk); + + add_wait_queue(&page->wait, &wait); + do { + block_sync_page(page); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (!PageLocked(page) || pl->do_not_lock) + break; + schedule(); + } while (PageLocked(page)); + tsk->state = TASK_RUNNING; + remove_wait_queue(&page->wait, &wait); +} + +/* + * Get an exclusive lock on the page.. + * but, every time you get woken up, check the page to make sure + * someone hasn't called a journal_begin with it locked. + * + * the page should always be locked when this returns + * + * returns 0 if you've got the page locked + * returns 1 if it returns because someone else has called journal_begin + * with the page locked + * this is only useful to the code that flushes pages before a + * commit. Do not export this hack. Ever. + */ +static int reiserfs_try_lock_page(struct reiserfs_page_list *pl) +{ + struct page *page = pl->page ; + while (TryLockPage(page)) { + if (pl->do_not_lock) { + /* the page is locked, but we cannot have it */ + return 1 ; + } + ___reiserfs_wait_on_page(pl); + } + /* we have the page locked */ + return 0 ; +} + + +/* +** This can only be called from do_journal_end. +** it runs through the list things that need flushing before the +** transaction can commit, and writes each of them to disk +** +*/ + +static void flush_pages_before_commit(struct reiserfs_transaction_handle *th, + struct super_block *p_s_sb) { + struct reiserfs_page_list *pl = SB_JOURNAL(p_s_sb)->j_flush_pages ; + struct reiserfs_page_list *pl_tmp ; + struct buffer_head *bh, *head ; + int count = 0 ; + + /* first write each dirty unlocked buffer in the list */ + + while(pl) { + /* ugly. journal_end can be called from get_block, which has a + ** page locked. So, we have to check to see if pl->page is the page + ** currently locked by the calling function, and if so, skip the + ** lock + */ + if (reiserfs_try_lock_page(pl)) { + goto setup_next ; + } + if (!PageLocked(pl->page)) { + BUG() ; + } + if (pl->page->buffers) { + head = pl->page->buffers ; + bh = head ; + do { + if (bh->b_blocknr == pl->blocknr && buffer_dirty(bh) && + !buffer_locked(bh) && buffer_uptodate(bh) ) { + bh->b_end_io = reiserfs_journal_end_io ; + ll_rw_block(WRITE, 1, &bh) ; + } + bh = bh->b_this_page ; + } while (bh != head) ; + } + if (!pl->do_not_lock) { + UnlockPage(pl->page) ; + } +setup_next: + pl = pl->next ; + } + + /* now wait on them */ + + pl = SB_JOURNAL(p_s_sb)->j_flush_pages ; + while(pl) { + if (reiserfs_try_lock_page(pl)) { + goto remove_page ; + } + if (!PageLocked(pl->page)) { + BUG() ; + } + if (pl->page->buffers) { + head = pl->page->buffers ; + bh = head ; + do { + if (bh->b_blocknr == pl->blocknr) { + count++ ; + wait_on_buffer(bh) ; + if (!buffer_uptodate(bh)) { + reiserfs_panic(p_s_sb, "journal-2443: flush_pages_before_commit, error writing block %lu\n", bh->b_blocknr) ; + } + } + bh = bh->b_this_page ; + } while (bh != head) ; + } + if (!pl->do_not_lock) { + UnlockPage(pl->page) ; + } +remove_page: + /* we've waited on the I/O, we can remove the page from the + ** list, and free our pointer struct to it. + */ + if (pl->prev) { + pl->prev->next = pl->next ; + } + if (pl->next) { + pl->next->prev = pl->prev ; + } + put_page(pl->page) ; + pl_tmp = pl ; + pl = pl->next ; + reiserfs_kfree(pl_tmp, sizeof(struct reiserfs_page_list), p_s_sb) ; + } + SB_JOURNAL(p_s_sb)->j_flush_pages = NULL ; +} + +/* +** called when a indirect item is converted back into a tail. +** +** The reiserfs part of the inode stores enough information to find +** our page_list struct in the flush list. We remove it from the list +** and free the struct. +** +** Note, it is possible for this to happen: +** +** reiserfs_add_page_to_flush_list(inode) +** transaction ends, list is flushed +** reiserfs_remove_page_from_flush_list(inode) +** +** This would be bad because the page_list pointer in the inode is not +** updated when the list is flushed, so we can't know if the pointer is +** valid. So, in the inode, we also store the transaction id when the +** page was added. If we are trying to remove something from an old +** transaction, we just clear out the pointer in the inode and return. +** +** Normal case is to use the reiserfs_page_list pointer in the inode to +** find and remove the page from the flush list. +*/ +int reiserfs_remove_page_from_flush_list(struct reiserfs_transaction_handle *th, + struct inode *inode) { + struct reiserfs_page_list *pl ; + + /* was this conversion done in a previous transaction? If so, return */ + if (inode->u.reiserfs_i.i_conversion_trans_id < th->t_trans_id) { + inode->u.reiserfs_i.i_converted_page = NULL ; + inode->u.reiserfs_i.i_conversion_trans_id = 0 ; + return 0 ; + } + + /* remove the page_list struct from the list, release our hold on the + ** page, and free the page_list struct + */ + pl = inode->u.reiserfs_i.i_converted_page ; + if (pl) { + if (pl->next) { + pl->next->prev = pl->prev ; + } + if (pl->prev) { + pl->prev->next = pl->next ; + } + if (SB_JOURNAL(inode->i_sb)->j_flush_pages == pl) { + SB_JOURNAL(inode->i_sb)->j_flush_pages = pl->next ; + } + put_page(pl->page) ; + reiserfs_kfree(pl, sizeof(struct reiserfs_page_list), inode->i_sb) ; + inode->u.reiserfs_i.i_converted_page = NULL ; + inode->u.reiserfs_i.i_conversion_trans_id = 0 ; + } + return 0 ; +} + +/* +** Called after a direct to indirect transaction. The unformatted node +** must be flushed to disk before the transaction commits, otherwise, we +** risk losing the data from the direct item. This adds the page +** containing the unformatted node to a list of pages that need flushing. +** +** it calls get_page(page), so the page won't disappear until we've +** flushed or removed it from our list. +** +** pointers to the reiserfs_page_list struct are stored in the inode, +** so this page can be quickly removed from the list after the tail is +** converted back into a direct item. +** +** If we fail to find the memory for the reiserfs_page_list struct, we +** just sync the page now. Not good, but safe. +** +** since this must be called with the page locked, we always set +** the do_not_lock field in the page_list struct we allocate +** +*/ +int reiserfs_add_page_to_flush_list(struct reiserfs_transaction_handle *th, + struct inode *inode, + struct buffer_head *bh) { + struct reiserfs_page_list *new_pl ; + +/* debugging use ONLY. Do not define this on data you care about. */ +#ifdef REISERFS_NO_FLUSH_AFTER_CONVERT + return 0 ; +#endif + + get_page(bh->b_page) ; + new_pl = reiserfs_kmalloc(sizeof(struct reiserfs_page_list), GFP_BUFFER, + inode->i_sb) ; + if (!new_pl) { + put_page(bh->b_page) ; + reiserfs_warning("journal-2480: forced to flush page, out of memory\n") ; + ll_rw_block(WRITE, 1, &bh) ; + wait_on_buffer(bh) ; + if (!buffer_uptodate(bh)) { + reiserfs_panic(inode->i_sb, "journal-2484: error writing buffer %lu to disk\n", bh->b_blocknr) ; + } + inode->u.reiserfs_i.i_converted_page = NULL ; + return 0 ; + } + + new_pl->page = bh->b_page ; + new_pl->do_not_lock = 1 ; + new_pl->blocknr = bh->b_blocknr ; + new_pl->next = SB_JOURNAL(inode->i_sb)->j_flush_pages; + if (new_pl->next) { + new_pl->next->prev = new_pl ; + } + new_pl->prev = NULL ; + SB_JOURNAL(inode->i_sb)->j_flush_pages = new_pl ; + + /* if we have numbers from an old transaction, zero the converted + ** page, it has already been flushed and freed + */ + if (inode->u.reiserfs_i.i_conversion_trans_id && + inode->u.reiserfs_i.i_conversion_trans_id < th->t_trans_id) { + inode->u.reiserfs_i.i_converted_page = NULL ; + } + if (inode->u.reiserfs_i.i_converted_page) { + reiserfs_panic(inode->i_sb, "journal-2501: inode already had a converted page\n") ; + } + inode->u.reiserfs_i.i_converted_page = new_pl ; + inode->u.reiserfs_i.i_conversion_trans_id = th->t_trans_id ; + return 0 ; +} + +/* +** long and ugly. If flush, will not return until all commit +** blocks and all real buffers in the trans are on disk. +** If no_async, won't return until all commit blocks are on disk. +** +** keep reading, there are comments as you go along +*/ +static int do_journal_end(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, unsigned long nblocks, + int flags) { + struct reiserfs_journal_cnode *cn, *next, *jl_cn; + struct reiserfs_journal_cnode *last_cn = NULL; + struct reiserfs_journal_desc *desc ; + struct reiserfs_journal_commit *commit ; + struct buffer_head *c_bh ; /* commit bh */ + struct buffer_head *d_bh ; /* desc bh */ + int cur_write_start = 0 ; /* start index of current log write */ + int cur_blocks_left = 0 ; /* number of journal blocks left to write */ + int old_start ; + int i ; + int jindex ; + int orig_jindex ; + int flush = flags & FLUSH_ALL ; + int commit_now = flags & COMMIT_NOW ; + int wait_on_commit = flags & WAIT ; + struct reiserfs_super_block *rs ; + int windex = push_journal_writer("do_journal_end") ; + + if (reiserfs_dont_log(th->t_super)) { + pop_journal_writer(windex) ; + return 0 ; + } + + /* we must make sure all end_io tasks are done before we start marking + ** buffers as BH_JDirty_wait. Otherwise, they could be released and + ** cleaned before they properly get to disk + ** + ** we need the semaphore because other filesystems or journal writers + ** could be running the queue at the same time. We have to make sure the + ** queue is *empty*, not just being worked on. + */ + down(&reiserfs_end_io_sem) ; + run_task_queue(&reiserfs_end_io_tq) ; + up(&reiserfs_end_io_sem) ; + + + lock_journal(p_s_sb) ; + if (SB_JOURNAL(p_s_sb)->j_next_full_flush) { + flags |= FLUSH_ALL ; + flush = 1 ; + } + if (SB_JOURNAL(p_s_sb)->j_next_async_flush) { + flags |= COMMIT_NOW ; + commit_now = 1 ; + } + + /* check_journal_end locks the journal, and unlocks if it does not return 1 + ** it tells us if we should continue with the journal_end, or just return + */ + if (!check_journal_end(th, p_s_sb, nblocks, flags)) { + pop_journal_writer(windex) ; + return 0 ; + } + + /* check_journal_end might set these, check again */ + if (SB_JOURNAL(p_s_sb)->j_next_full_flush) { + flush = 1 ; + } + if (SB_JOURNAL(p_s_sb)->j_next_async_flush) { + commit_now = 1 ; + } + /* + ** j must wait means we have to flush the log blocks, and the real blocks for + ** this transaction + */ + if (SB_JOURNAL(p_s_sb)->j_must_wait > 0) { + flush = 1 ; + } + + rs = SB_DISK_SUPER_BLOCK(p_s_sb) ; + /* setup description block */ + d_bh = getblk(p_s_sb->s_dev, reiserfs_get_journal_block(p_s_sb) + SB_JOURNAL(p_s_sb)->j_start, p_s_sb->s_blocksize) ; + mark_buffer_uptodate(d_bh, 1) ; + desc = (struct reiserfs_journal_desc *)(d_bh)->b_data ; + memset(desc, 0, sizeof(struct reiserfs_journal_desc)) ; + memcpy(desc->j_magic, JOURNAL_DESC_MAGIC, 8) ; + desc->j_trans_id = cpu_to_le32(SB_JOURNAL(p_s_sb)->j_trans_id) ; + + /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ + c_bh = getblk(p_s_sb->s_dev, reiserfs_get_journal_block(p_s_sb) + + ((SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL(p_s_sb)->j_len + 1) % JOURNAL_BLOCK_COUNT), + p_s_sb->s_blocksize) ; + d_bh->b_end_io = reiserfs_journal_end_io ; + c_bh->b_end_io = reiserfs_journal_end_io ; + commit = (struct reiserfs_journal_commit *)c_bh->b_data ; + memset(commit, 0, sizeof(struct reiserfs_journal_commit)) ; + commit->j_trans_id = cpu_to_le32(SB_JOURNAL(p_s_sb)->j_trans_id) ; + mark_buffer_uptodate(c_bh, 1) ; + + /* init this journal list */ + atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_older_commits_done), 0) ; + SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ; + SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_timestamp = SB_JOURNAL(p_s_sb)->j_trans_start_time ; + SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_bh = c_bh ; + SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_start = SB_JOURNAL(p_s_sb)->j_start ; + SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_len = SB_JOURNAL(p_s_sb)->j_len ; + atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_nonzerolen), SB_JOURNAL(p_s_sb)->j_len) ; + atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_left), SB_JOURNAL(p_s_sb)->j_len + 2); + SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_realblock = NULL ; + atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_flushing), 1) ; + atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_flushing), 1) ; + + /* which is faster, locking/unlocking at the start and end of the for + ** or locking once per iteration around the insert_journal_hash? + ** eitherway, we are write locking insert_journal_hash. The ENTIRE FOR + ** LOOP MUST not cause schedule to occur. + */ + + /* for each real block, add it to the journal list hash, + ** copy into real block index array in the commit or desc block + */ + for (i = 0, cn = SB_JOURNAL(p_s_sb)->j_first ; cn ; cn = cn->next, i++) { + if (test_bit(BH_JDirty, &cn->bh->b_state) ) { + jl_cn = get_cnode(p_s_sb) ; + if (!jl_cn) { + reiserfs_panic(p_s_sb, "journal-1676, get_cnode returned NULL\n") ; + } + if (i == 0) { + SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_realblock = jl_cn ; + } + jl_cn->prev = last_cn ; + jl_cn->next = NULL ; + if (last_cn) { + last_cn->next = jl_cn ; + } + last_cn = jl_cn ; + if (cn->bh->b_blocknr >= reiserfs_get_journal_block(p_s_sb) && + cn->bh->b_blocknr < (reiserfs_get_journal_block(p_s_sb) + JOURNAL_BLOCK_COUNT)) { + reiserfs_panic(p_s_sb, "journal-2332: Trying to log block %lu, which is a log block\n", cn->bh->b_blocknr) ; + } + jl_cn->blocknr = cn->bh->b_blocknr ; + jl_cn->state = 0 ; + jl_cn->dev = cn->bh->b_dev ; + jl_cn->bh = cn->bh ; + jl_cn->jlist = SB_JOURNAL_LIST(p_s_sb) + SB_JOURNAL_LIST_INDEX(p_s_sb) ; + insert_journal_hash(SB_JOURNAL(p_s_sb)->j_list_hash_table, jl_cn) ; + if (i < JOURNAL_TRANS_HALF) { + desc->j_realblock[i] = cpu_to_le32(cn->bh->b_blocknr) ; + } else { + commit->j_realblock[i - JOURNAL_TRANS_HALF] = cpu_to_le32(cn->bh->b_blocknr) ; + } + } else { + i-- ; + } + } + + desc->j_len = cpu_to_le32(SB_JOURNAL(p_s_sb)->j_len) ; + desc->j_mount_id = cpu_to_le32(SB_JOURNAL(p_s_sb)->j_mount_id) ; + desc->j_trans_id = cpu_to_le32(SB_JOURNAL(p_s_sb)->j_trans_id) ; + commit->j_len = cpu_to_le32(SB_JOURNAL(p_s_sb)->j_len) ; + + /* special check in case all buffers in the journal were marked for not logging */ + if (SB_JOURNAL(p_s_sb)->j_len == 0) { + brelse(d_bh) ; + brelse(c_bh) ; + unlock_journal(p_s_sb) ; +printk("journal-2020: do_journal_end: BAD desc->j_len is ZERO\n") ; + atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ; + wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ; + pop_journal_writer(windex) ; + return 0 ; + } + + /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ + cur_write_start = SB_JOURNAL(p_s_sb)->j_start ; + cur_blocks_left = SB_JOURNAL(p_s_sb)->j_len ; + cn = SB_JOURNAL(p_s_sb)->j_first ; + jindex = 1 ; /* start at one so we don't get the desc again */ + while(cur_blocks_left > 0) { + /* copy all the real blocks into log area. dirty log blocks */ + if (test_bit(BH_JDirty, &cn->bh->b_state)) { + struct buffer_head *tmp_bh ; + tmp_bh = getblk(p_s_sb->s_dev, reiserfs_get_journal_block(p_s_sb) + + ((cur_write_start + jindex) % JOURNAL_BLOCK_COUNT), + p_s_sb->s_blocksize) ; + tmp_bh->b_end_io = reiserfs_journal_end_io ; + mark_buffer_uptodate(tmp_bh, 1) ; + memcpy(tmp_bh->b_data, cn->bh->b_data, cn->bh->b_size) ; + jindex++ ; + } else { + /* JDirty cleared sometime during transaction. don't log this one */ + printk("journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!\n") ; + } + cn = cn->next ; + cur_blocks_left-- ; + } + + /* we are done with both the c_bh and d_bh, but + ** c_bh must be written after all other commit blocks, + ** so we dirty/relse c_bh in journal_end_io, with commit_left <= 1. + */ + + /* now loop through and mark all buffers from this transaction as JDirty_wait + ** clear the JDirty bit, clear BH_JNew too. + ** if they weren't JDirty, they weren't logged, just relse them and move on + */ + cn = SB_JOURNAL(p_s_sb)->j_first ; + while(cn) { + clear_bit(BH_JNew, &(cn->bh->b_state)) ; + if (test_bit(BH_JDirty, &(cn->bh->b_state))) { + set_bit(BH_JDirty_wait, &(cn->bh->b_state)) ; + clear_bit(BH_JDirty, &(cn->bh->b_state)) ; + } else { + brelse(cn->bh) ; + } + next = cn->next ; + free_cnode(p_s_sb, cn) ; + cn = next ; + } + + /* unlock the journal list for committing and flushing */ + atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_flushing), 0) ; + atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_flushing), 0) ; + + orig_jindex = SB_JOURNAL_LIST_INDEX(p_s_sb) ; + jindex = (SB_JOURNAL_LIST_INDEX(p_s_sb) + 1) % JOURNAL_LIST_COUNT ; + SB_JOURNAL_LIST_INDEX(p_s_sb) = jindex ; + + /* make sure to flush any data converted from direct items to + ** indirect items before allowing the commit blocks to reach the + ** disk + */ + flush_pages_before_commit(th, p_s_sb) ; + + /* honor the flush and async wishes from the caller */ + if (flush) { + + flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ; + flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex , 0, 1) ; /* flush all */ + } else if (commit_now) { + if (wait_on_commit) { + flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ; + } else { + commit_flush_async(p_s_sb, orig_jindex) ; + } + } + + /* reset journal values for the next transaction */ + old_start = SB_JOURNAL(p_s_sb)->j_start ; + SB_JOURNAL(p_s_sb)->j_start = (SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL(p_s_sb)->j_len + 2) % JOURNAL_BLOCK_COUNT; + atomic_set(&(SB_JOURNAL(p_s_sb)->j_wcount), 0) ; + SB_JOURNAL(p_s_sb)->j_bcount = 0 ; + SB_JOURNAL(p_s_sb)->j_last = NULL ; + SB_JOURNAL(p_s_sb)->j_first = NULL ; + SB_JOURNAL(p_s_sb)->j_len = 0 ; + SB_JOURNAL(p_s_sb)->j_trans_start_time = 0 ; + SB_JOURNAL(p_s_sb)->j_trans_id++ ; + SB_JOURNAL(p_s_sb)->j_must_wait = 0 ; + SB_JOURNAL(p_s_sb)->j_len_alloc = 0 ; + SB_JOURNAL(p_s_sb)->j_next_full_flush = 0 ; + SB_JOURNAL(p_s_sb)->j_next_async_flush = 0 ; + init_journal_hash(p_s_sb) ; + + /* if the next transaction has any chance of wrapping, flush + ** transactions that might get overwritten. If any journal lists are very + ** old flush them as well. Since data will get to disk every 30 seconds or + ** so, any list that has unflushed members after 2 minutes was a victim to + ** memory shortages during the end_io handler. Clean things up for them + ** + */ + for (i =0 ; i < JOURNAL_LIST_COUNT ; i++) { + jindex = i ; + if (SB_JOURNAL_LIST(p_s_sb)[jindex].j_len > 0 && SB_JOURNAL(p_s_sb)->j_start <= SB_JOURNAL_LIST(p_s_sb)[jindex].j_start) { + if ((SB_JOURNAL(p_s_sb)->j_start + JOURNAL_TRANS_MAX + 1) >= SB_JOURNAL_LIST(p_s_sb)[jindex].j_start) { + flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 0, 1) ; /* do flush all */ + } + } else if (SB_JOURNAL_LIST(p_s_sb)[jindex].j_len > 0 && + (SB_JOURNAL(p_s_sb)->j_start + JOURNAL_TRANS_MAX + 1) > JOURNAL_BLOCK_COUNT) { + if (((SB_JOURNAL(p_s_sb)->j_start + JOURNAL_TRANS_MAX + 1) % JOURNAL_BLOCK_COUNT) >= + SB_JOURNAL_LIST(p_s_sb)[jindex].j_start) { + flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 0,1 ) ; /* do flush all */ + } + } else if (SB_JOURNAL_LIST(p_s_sb)[jindex].j_len > 0 && + SB_JOURNAL_LIST(p_s_sb)[jindex].j_timestamp < + (CURRENT_TIME - (JOURNAL_MAX_TRANS_AGE * 4))) { + flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 0,1 ) ; + } + } + + /* if the next journal_list is still in use, flush it */ + if (SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_len != 0) { + flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + SB_JOURNAL_LIST_INDEX(p_s_sb), 0, 1) ; /* do flush all */ + } + + /* we don't want anyone flushing the new transaction's list */ + atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_flushing), 1) ; + atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_flushing), 1) ; + SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_list_bitmap = get_list_bitmap(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + + SB_JOURNAL_LIST_INDEX(p_s_sb)) ; + + if (!(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_list_bitmap)) { + reiserfs_panic(p_s_sb, "journal-1996: do_journal_end, could not get a list bitmap\n") ; + } + unlock_journal(p_s_sb) ; + atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ; + /* wake up any body waiting to join. */ + wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ; + pop_journal_writer(windex) ; + return 0 ; +} + + + diff -u -r --new-file linux/fs/reiserfs/lbalance.c v2.4.0-test8/linux/fs/reiserfs/lbalance.c --- linux/fs/reiserfs/lbalance.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/lbalance.c Sun May 21 17:26:43 2000 @@ -0,0 +1,1325 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ + +#ifdef __KERNEL__ + +#include <asm/uaccess.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/reiserfs_fs.h> + +#else + +#include "nokernel.h" + +#endif + +/* these are used in do_balance.c */ + +/* leaf_move_items + leaf_shift_left + leaf_shift_right + leaf_delete_items + leaf_insert_into_buf + leaf_paste_in_buffer + leaf_cut_from_buffer + leaf_paste_entries + */ + + +/* copy copy_count entries from source directory item to dest buffer (creating new item if needed) */ +static void leaf_copy_dir_entries (struct buffer_info * dest_bi, struct buffer_head * source, + int last_first, int item_num, int from, int copy_count) +{ + struct buffer_head * dest = dest_bi->bi_bh; + int item_num_in_dest; /* either the number of target item, + or if we must create a new item, + the number of the item we will + create it next to */ + struct item_head * ih; + struct reiserfs_de_head * deh; + int copy_records_len; /* length of all records in item to be copied */ + char * records; + + ih = B_N_PITEM_HEAD (source, item_num); + +#ifdef CONFIG_REISERFS_CHECK + if (!is_direntry_le_ih (ih)) + reiserfs_panic(0, "vs-10000: leaf_copy_dir_entries: item must be directory item"); +#endif + + /* length of all record to be copied and first byte of the last of them */ + deh = B_I_DEH (source, ih); + if (copy_count) { + copy_records_len = (from ? deh[from - 1].deh_location : ih->ih_item_len) - + deh[from + copy_count - 1].deh_location; + records = source->b_data + ih->ih_item_location + deh[from + copy_count - 1].deh_location; + } else { + copy_records_len = 0; + records = 0; + } + + /* when copy last to first, dest buffer can contain 0 items */ + item_num_in_dest = (last_first == LAST_TO_FIRST) ? (( B_NR_ITEMS(dest) ) ? 0 : -1) : (B_NR_ITEMS(dest) - 1); + + /* if there are no items in dest or the first/last item in dest is not item of the same directory */ + if ( (item_num_in_dest == - 1) || +#ifdef REISERFS_FSCK + (last_first == FIRST_TO_LAST && are_items_mergeable (B_N_PITEM_HEAD (dest, item_num_in_dest), ih, dest->b_size) == 0) || + (last_first == LAST_TO_FIRST && are_items_mergeable (ih, B_N_PITEM_HEAD (dest, item_num_in_dest), dest->b_size) == 0)) { +#else + (last_first == FIRST_TO_LAST && le_key_k_offset (ih_version (ih), &(ih->ih_key)) == DOT_OFFSET) || + (last_first == LAST_TO_FIRST && comp_short_le_keys/*COMP_SHORT_KEYS*/ (&ih->ih_key, B_N_PKEY (dest, item_num_in_dest)))) { +#endif + /* create new item in dest */ + struct item_head new_ih; + + /* form item header */ + memcpy (&new_ih.ih_key, &ih->ih_key, KEY_SIZE); + new_ih.ih_version = cpu_to_le16 (ITEM_VERSION_1); + /* calculate item len */ + new_ih.ih_item_len = cpu_to_le16 (DEH_SIZE * copy_count + copy_records_len); + I_ENTRY_COUNT(&new_ih) = 0; + + if (last_first == LAST_TO_FIRST) { + /* form key by the following way */ + if (from < I_ENTRY_COUNT(ih)) { + set_le_ih_k_offset (&new_ih, cpu_to_le32 (le32_to_cpu (deh[from].deh_offset))); + /*memcpy (&new_ih.ih_key.k_offset, &deh[from].deh_offset, SHORT_KEY_SIZE);*/ + } else { + /* no entries will be copied to this item in this function */ + set_le_ih_k_offset (&new_ih, cpu_to_le32 (U32_MAX)); + /* this item is not yet valid, but we want I_IS_DIRECTORY_ITEM to return 1 for it, so we -1 */ + } + set_le_key_k_type (ITEM_VERSION_1, &(new_ih.ih_key), TYPE_DIRENTRY); + } + + /* insert item into dest buffer */ + leaf_insert_into_buf (dest_bi, (last_first == LAST_TO_FIRST) ? 0 : B_NR_ITEMS(dest), &new_ih, NULL, 0); + } else { + /* prepare space for entries */ + leaf_paste_in_buffer (dest_bi, (last_first==FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - 1) : 0, MAX_US_INT, + DEH_SIZE * copy_count + copy_records_len, records, 0 + ); + } + + item_num_in_dest = (last_first == FIRST_TO_LAST) ? (B_NR_ITEMS(dest)-1) : 0; + + leaf_paste_entries (dest_bi->bi_bh, item_num_in_dest, + (last_first == FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD (dest, item_num_in_dest)) : 0, + copy_count, deh + from, records, + DEH_SIZE * copy_count + copy_records_len + ); +} + + +/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or + part of it or nothing (see the return 0 below) from SOURCE to the end + (if last_first) or beginning (!last_first) of the DEST */ +/* returns 1 if anything was copied, else 0 */ +static int leaf_copy_boundary_item (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, + int bytes_or_entries) +{ + struct buffer_head * dest = dest_bi->bi_bh; + int dest_nr_item, src_nr_item; /* number of items in the source and destination buffers */ + struct item_head * ih; + struct item_head * dih; + + dest_nr_item = B_NR_ITEMS(dest); + + if ( last_first == FIRST_TO_LAST ) { + /* if ( DEST is empty or first item of SOURCE and last item of DEST are the items of different objects + or of different types ) then there is no need to treat this item differently from the other items + that we copy, so we return */ + ih = B_N_PITEM_HEAD (src, 0); + dih = B_N_PITEM_HEAD (dest, dest_nr_item - 1); +#ifdef REISERFS_FSCK + if (!dest_nr_item || (are_items_mergeable (dih, ih, src->b_size) == 0)) +#else + if (!dest_nr_item || (!op_is_left_mergeable (&(ih->ih_key), src->b_size))) +#endif + /* there is nothing to merge */ + return 0; + +#ifdef CONFIG_REISERFS_CHECK + if ( ! ih->ih_item_len ) + reiserfs_panic (0, "vs-10010: leaf_copy_boundary_item: item can not have empty dynamic length"); +#endif + + if ( is_direntry_le_ih (ih) ) { + if ( bytes_or_entries == -1 ) + /* copy all entries to dest */ + bytes_or_entries = le16_to_cpu (ih->u.ih_entry_count); + leaf_copy_dir_entries (dest_bi, src, FIRST_TO_LAST, 0, 0, bytes_or_entries); + return 1; + } + + /* copy part of the body of the first item of SOURCE to the end of the body of the last item of the DEST + part defined by 'bytes_or_entries'; if bytes_or_entries == -1 copy whole body; don't create new item header + */ + if ( bytes_or_entries == -1 ) + bytes_or_entries = le16_to_cpu (ih->ih_item_len); + +#ifdef CONFIG_REISERFS_CHECK + else { + if (bytes_or_entries == le16_to_cpu (ih->ih_item_len) && is_indirect_le_ih(ih)) + if (get_ih_free_space (ih)) + reiserfs_panic (0, "vs-10020: leaf_copy_boundary_item: " + "last unformatted node must be filled entirely (%h)", + ih); + } +#endif + + /* merge first item (or its part) of src buffer with the last + item of dest buffer. Both are of the same file */ + leaf_paste_in_buffer (dest_bi, + dest_nr_item - 1, dih->ih_item_len, bytes_or_entries, B_I_PITEM(src,ih), 0 + ); + + if (is_indirect_le_ih (dih)) { +#ifdef CONFIG_REISERFS_CHECK + if (get_ih_free_space (dih)) + reiserfs_panic (0, "vs-10030: leaf_copy_boundary_item: " + "merge to left: last unformatted node of non-last indirect item %h must have zerto free space", + ih); +#endif + if (bytes_or_entries == le16_to_cpu (ih->ih_item_len)) + set_ih_free_space (dih, get_ih_free_space (ih)); + } + + return 1; + } + + + /* copy boundary item to right (last_first == LAST_TO_FIRST) */ + + /* ( DEST is empty or last item of SOURCE and first item of DEST + are the items of different object or of different types ) + */ + src_nr_item = B_NR_ITEMS (src); + ih = B_N_PITEM_HEAD (src, src_nr_item - 1); + dih = B_N_PITEM_HEAD (dest, 0); + +#ifdef REISERFS_FSCK + if (!dest_nr_item || are_items_mergeable (ih, dih, src->b_size) == 0) +#else + if (!dest_nr_item || !op_is_left_mergeable (&(dih->ih_key), src->b_size)) +#endif + return 0; + + if ( is_direntry_le_ih (ih)) { + if ( bytes_or_entries == -1 ) + /* bytes_or_entries = entries number in last item body of SOURCE */ + bytes_or_entries = le16_to_cpu (ih->u.ih_entry_count); + + leaf_copy_dir_entries (dest_bi, src, LAST_TO_FIRST, src_nr_item - 1, le16_to_cpu (ih->u.ih_entry_count) - bytes_or_entries, bytes_or_entries); + return 1; + } + + /* copy part of the body of the last item of SOURCE to the begin of the body of the first item of the DEST; + part defined by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; change first item key of the DEST; + don't create new item header + */ + +#ifdef CONFIG_REISERFS_CHECK + if (is_indirect_le_ih(ih) && get_ih_free_space (ih)) + reiserfs_panic (0, "vs-10040: leaf_copy_boundary_item: " + "merge to right: last unformatted node of non-last indirect item must be filled entirely (%h)", + ih); +#endif + + if ( bytes_or_entries == -1 ) { + /* bytes_or_entries = length of last item body of SOURCE */ + bytes_or_entries = ih->ih_item_len; + +#ifdef CONFIG_REISERFS_CHECK + if (le_ih_k_offset (dih) != le_ih_k_offset (ih) + op_bytes_number (ih, src->b_size)) + reiserfs_panic (0, "vs-10050: leaf_copy_boundary_item: items %h and %h do not match", ih, dih); +#endif + + /* change first item key of the DEST */ + set_le_ih_k_offset (dih, le_ih_k_offset (ih)); + + /* item becomes non-mergeable */ + /* or mergeable if left item was */ + set_le_ih_k_type (dih, le_ih_k_type (ih)); + } else { + /* merge to right only part of item */ +#ifdef CONFIG_REISERFS_CHECK + if ( le16_to_cpu (ih->ih_item_len) <= bytes_or_entries ) + reiserfs_panic (0, "vs-10060: leaf_copy_boundary_item: no so much bytes %lu (needed %lu)", + ih->ih_item_len, bytes_or_entries); +#endif + + /* change first item key of the DEST */ + if ( is_direct_le_ih (dih) ) { +#ifdef CONFIG_REISERFS_CHECK + if (le_ih_k_offset (dih) <= (unsigned long)bytes_or_entries) + reiserfs_panic (0, "vs-10070: leaf_copy_boundary_item: dih %h, bytes_or_entries(%d)", + dih, bytes_or_entries); +#endif + set_le_ih_k_offset (dih, le_ih_k_offset (dih) - bytes_or_entries); + } else { +#ifdef CONFIG_REISERFS_CHECK + if (le_ih_k_offset (dih) <= (bytes_or_entries / UNFM_P_SIZE) * dest->b_size ) + reiserfs_panic (0, "vs-10080: leaf_copy_boundary_item: dih %h, bytes_or_entries(%d)", + dih, (bytes_or_entries/UNFM_P_SIZE)*dest->b_size); +#endif + set_le_ih_k_offset (dih, le_ih_k_offset (dih) - ((bytes_or_entries / UNFM_P_SIZE) * dest->b_size)); + } + } + + leaf_paste_in_buffer (dest_bi, 0, 0, bytes_or_entries, B_I_PITEM(src,ih) + ih->ih_item_len - bytes_or_entries, 0); + return 1; +} + + +/* copy cpy_mun items from buffer src to buffer dest + * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning from first-th item in src to tail of dest + * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning from first-th item in src to head of dest + */ +static void leaf_copy_items_entirely (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, + int first, int cpy_num) +{ + struct buffer_head * dest; + int nr; + int dest_before; + int last_loc, last_inserted_loc, location; + int i, j; + struct block_head * blkh; + struct item_head * ih; + +#ifdef CONFIG_REISERFS_CHECK + if (last_first != LAST_TO_FIRST && last_first != FIRST_TO_LAST) + reiserfs_panic (0, "vs-10090: leaf_copy_items_entirely: bad last_first parameter %d", last_first); + + if (B_NR_ITEMS (src) - first < cpy_num) + reiserfs_panic (0, "vs-10100: leaf_copy_items_entirely: too few items in source %d, required %d from %d", + B_NR_ITEMS(src), cpy_num, first); + + if (cpy_num < 0) + reiserfs_panic (0, "vs-10110: leaf_copy_items_entirely: can not copy negative amount of items"); + + if ( ! dest_bi ) + reiserfs_panic (0, "vs-10120: leaf_copy_items_entirely: can not copy negative amount of items"); +#endif + + dest = dest_bi->bi_bh; + +#ifdef CONFIG_REISERFS_CHECK + if ( ! dest ) + reiserfs_panic (0, "vs-10130: leaf_copy_items_entirely: can not copy negative amount of items"); +#endif + + if (cpy_num == 0) + return; + + nr = le16_to_cpu ((blkh = B_BLK_HEAD(dest))->blk_nr_item); + + /* we will insert items before 0-th or nr-th item in dest buffer. It depends of last_first parameter */ + dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr; + + /* location of head of first new item */ + ih = B_N_PITEM_HEAD (dest, dest_before); + +#ifdef CONFIG_REISERFS_CHECK + if (le16_to_cpu (blkh->blk_free_space) < cpy_num * IH_SIZE) { + reiserfs_panic (0, "vs-10140: leaf_copy_items_entirely: " + "not enough free space for headers %d (needed %d)", + B_FREE_SPACE (dest), cpy_num * IH_SIZE); + } +#endif + + /* prepare space for headers */ + memmove (ih + cpy_num, ih, (nr-dest_before) * IH_SIZE); + + /* copy item headers */ + memcpy (ih, B_N_PITEM_HEAD (src, first), cpy_num * IH_SIZE); + + blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) - IH_SIZE * cpy_num); + + /* location of unmovable item */ + j = location = (dest_before == 0) ? dest->b_size : (ih-1)->ih_item_location; + for (i = dest_before; i < nr + cpy_num; i ++) + ih[i-dest_before].ih_item_location = + (location -= ih[i-dest_before].ih_item_len); + + /* prepare space for items */ + last_loc = ih[nr+cpy_num-1-dest_before].ih_item_location; + last_inserted_loc = ih[cpy_num-1].ih_item_location; + + /* check free space */ +#ifdef CONFIG_REISERFS_CHECK + if (le16_to_cpu (blkh->blk_free_space) < j - last_inserted_loc) { + reiserfs_panic (0, "vs-10150: leaf_copy_items_entirely: not enough free space for items %d (needed %d)", + le16_to_cpu (blkh->blk_free_space), j - last_inserted_loc); + } +#endif + + memmove (dest->b_data + last_loc, + dest->b_data + last_loc + j - last_inserted_loc, + last_inserted_loc - last_loc); + + /* copy items */ + memcpy (dest->b_data + last_inserted_loc, B_N_PITEM(src,(first + cpy_num - 1)), + j - last_inserted_loc); + + /* sizes, item number */ + blkh->blk_nr_item = cpu_to_le16 (le16_to_cpu (blkh->blk_nr_item) + cpy_num); + blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) - (j - last_inserted_loc)); + + do_balance_mark_leaf_dirty (dest_bi->tb, dest, 0); + + if (dest_bi->bi_parent) { +#ifdef CONFIG_REISERFS_CHECK + if (B_N_CHILD (dest_bi->bi_parent, dest_bi->bi_position)->dc_block_number != dest->b_blocknr) { + reiserfs_panic (0, "vs-10160: leaf_copy_items_entirely: " + "block number in bh does not match to field in disk_child structure %lu and %lu", + dest->b_blocknr, B_N_CHILD (dest_bi->bi_parent, dest_bi->bi_position)->dc_block_number); + } +#endif + B_N_CHILD (dest_bi->bi_parent, dest_bi->bi_position)->dc_size += + j - last_inserted_loc + IH_SIZE * cpy_num; + + do_balance_mark_internal_dirty (dest_bi->tb, dest_bi->bi_parent, 0); + } +} + + +/* This function splits the (liquid) item into two items (useful when + shifting part of an item into another node.) */ +static void leaf_item_bottle (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, + int item_num, int cpy_bytes) +{ + struct buffer_head * dest = dest_bi->bi_bh; + struct item_head * ih; + +#ifdef CONFIG_REISERFS_CHECK + if ( cpy_bytes == -1 ) + reiserfs_panic (0, "vs-10170: leaf_item_bottle: bytes == - 1 means: do not split item"); +#endif + + if ( last_first == FIRST_TO_LAST ) { + /* if ( if item in position item_num in buffer SOURCE is directory item ) */ + if (is_direntry_le_ih (ih = B_N_PITEM_HEAD(src,item_num))) + leaf_copy_dir_entries (dest_bi, src, FIRST_TO_LAST, item_num, 0, cpy_bytes); + else { + struct item_head n_ih; + + /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST + part defined by 'cpy_bytes'; create new item header; change old item_header (????); + n_ih = new item_header; + */ + memcpy (&n_ih, ih, IH_SIZE); + n_ih.ih_item_len = cpu_to_le16 (cpy_bytes); + if (is_indirect_le_ih (ih)) { +#ifdef CONFIG_REISERFS_CHECK + if (cpy_bytes == le16_to_cpu (ih->ih_item_len) && get_ih_free_space (ih)) + reiserfs_panic (0, "vs-10180: leaf_item_bottle: " + "when whole indirect item is bottle to left neighbor, it must have free_space==0 (not %lu)", + get_ih_free_space (ih)); +#endif + set_ih_free_space (&n_ih, 0); + } + +#ifdef CONFIG_REISERFS_CHECK + if (op_is_left_mergeable (&(ih->ih_key), src->b_size)) + reiserfs_panic (0, "vs-10190: leaf_item_bottle: bad mergeability of item %h", ih); +#endif + n_ih.ih_version = ih->ih_version;; + leaf_insert_into_buf (dest_bi, B_NR_ITEMS(dest), &n_ih, B_N_PITEM (src, item_num), 0); + } + } else { + /* if ( if item in position item_num in buffer SOURCE is directory item ) */ + if (is_direntry_le_ih(ih = B_N_PITEM_HEAD (src, item_num))) + leaf_copy_dir_entries (dest_bi, src, LAST_TO_FIRST, item_num, I_ENTRY_COUNT(ih) - cpy_bytes, cpy_bytes); + else { + struct item_head n_ih; + + /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST + part defined by 'cpy_bytes'; create new item header; + n_ih = new item_header; + */ + memcpy (&n_ih, ih, SHORT_KEY_SIZE); + n_ih.ih_version = cpu_to_le16 (ih_version (ih)); + if (is_direct_le_ih (ih)) { + set_le_ih_k_offset (&n_ih, le_ih_k_offset (ih) + le16_to_cpu (ih->ih_item_len) - cpy_bytes); + set_le_ih_k_type (&n_ih, TYPE_DIRECT); + set_ih_free_space (&n_ih, MAX_US_INT); + } else { + /* indirect item */ +#ifdef CONFIG_REISERFS_CHECK + if (!cpy_bytes && get_ih_free_space (ih)) + reiserfs_panic (0, "vs-10200: leaf_item_bottle: ih->ih_free_space must be 0 when indirect item will be appended"); +#endif + set_le_ih_k_offset (&n_ih, le_ih_k_offset (ih) + (le16_to_cpu (ih->ih_item_len) - cpy_bytes) / UNFM_P_SIZE * dest->b_size); + set_le_ih_k_type (&n_ih, TYPE_INDIRECT); + set_ih_free_space (&n_ih, get_ih_free_space (ih)); + } + + /* set item length */ + n_ih.ih_item_len = cpu_to_le16 (cpy_bytes); + n_ih.ih_version = cpu_to_le16 (le16_to_cpu (ih->ih_version)); + leaf_insert_into_buf (dest_bi, 0, &n_ih, B_N_PITEM(src,item_num) + le16_to_cpu (ih->ih_item_len) - cpy_bytes, 0); + } + } +} + + +/* If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE to DEST. + If cpy_bytes not equal to minus one than copy cpy_num-1 whole items from SOURCE to DEST. + From last item copy cpy_num bytes for regular item and cpy_num directory entries for + directory item. */ +static int leaf_copy_items (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, int cpy_num, + int cpy_bytes) +{ + struct buffer_head * dest; + int pos, i, src_nr_item, bytes; + + dest = dest_bi->bi_bh; +#ifdef CONFIG_REISERFS_CHECK + if (!dest || !src) + reiserfs_panic (0, "vs-10210: leaf_copy_items: !dest || !src"); + + if ( last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST ) + reiserfs_panic (0, "vs-10220: leaf_copy_items: last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST"); + + if ( B_NR_ITEMS(src) < cpy_num ) + reiserfs_panic (0, "vs-10230: leaf_copy_items: No enough items: %d, required %d", B_NR_ITEMS(src), cpy_num); + + if ( cpy_num < 0 ) + reiserfs_panic (0, "vs-10240: leaf_copy_items: cpy_num < 0 (%d)", cpy_num); +#endif + + if ( cpy_num == 0 ) + return 0; + + if ( last_first == FIRST_TO_LAST ) { + /* copy items to left */ + pos = 0; + if ( cpy_num == 1 ) + bytes = cpy_bytes; + else + bytes = -1; + + /* copy the first item or it part or nothing to the end of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) */ + i = leaf_copy_boundary_item (dest_bi, src, FIRST_TO_LAST, bytes); + cpy_num -= i; + if ( cpy_num == 0 ) + return i; + pos += i; + if ( cpy_bytes == -1 ) + /* copy first cpy_num items starting from position 'pos' of SOURCE to end of DEST */ + leaf_copy_items_entirely (dest_bi, src, FIRST_TO_LAST, pos, cpy_num); + else { + /* copy first cpy_num-1 items starting from position 'pos-1' of the SOURCE to the end of the DEST */ + leaf_copy_items_entirely (dest_bi, src, FIRST_TO_LAST, pos, cpy_num-1); + + /* copy part of the item which number is cpy_num+pos-1 to the end of the DEST */ + leaf_item_bottle (dest_bi, src, FIRST_TO_LAST, cpy_num+pos-1, cpy_bytes); + } + } else { + /* copy items to right */ + src_nr_item = B_NR_ITEMS (src); + if ( cpy_num == 1 ) + bytes = cpy_bytes; + else + bytes = -1; + + /* copy the last item or it part or nothing to the begin of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); */ + i = leaf_copy_boundary_item (dest_bi, src, LAST_TO_FIRST, bytes); + + cpy_num -= i; + if ( cpy_num == 0 ) + return i; + + pos = src_nr_item - cpy_num - i; + if ( cpy_bytes == -1 ) { + /* starting from position 'pos' copy last cpy_num items of SOURCE to begin of DEST */ + leaf_copy_items_entirely (dest_bi, src, LAST_TO_FIRST, pos, cpy_num); + } else { + /* copy last cpy_num-1 items starting from position 'pos+1' of the SOURCE to the begin of the DEST; */ + leaf_copy_items_entirely (dest_bi, src, LAST_TO_FIRST, pos+1, cpy_num-1); + + /* copy part of the item which number is pos to the begin of the DEST */ + leaf_item_bottle (dest_bi, src, LAST_TO_FIRST, pos, cpy_bytes); + } + } + return i; +} + + +/* there are types of coping: from S[0] to L[0], from S[0] to R[0], + from R[0] to L[0]. for each of these we have to define parent and + positions of destination and source buffers */ +static void leaf_define_dest_src_infos (int shift_mode, struct tree_balance * tb, struct buffer_info * dest_bi, + struct buffer_info * src_bi, int * first_last, + struct buffer_head * Snew) +{ +#ifdef CONFIG_REISERFS_CHECK + memset (dest_bi, 0, sizeof (struct buffer_info)); + memset (src_bi, 0, sizeof (struct buffer_info)); +#endif + + /* define dest, src, dest parent, dest position */ + switch (shift_mode) { + case LEAF_FROM_S_TO_L: /* it is used in leaf_shift_left */ + src_bi->tb = tb; + src_bi->bi_bh = PATH_PLAST_BUFFER (tb->tb_path); + src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, 0); + src_bi->bi_position = PATH_H_B_ITEM_ORDER (tb->tb_path, 0); /* src->b_item_order */ + dest_bi->tb = tb; + dest_bi->bi_bh = tb->L[0]; + dest_bi->bi_parent = tb->FL[0]; + dest_bi->bi_position = get_left_neighbor_position (tb, 0); + *first_last = FIRST_TO_LAST; + break; + + case LEAF_FROM_S_TO_R: /* it is used in leaf_shift_right */ + src_bi->tb = tb; + src_bi->bi_bh = PATH_PLAST_BUFFER (tb->tb_path); + src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, 0); + src_bi->bi_position = PATH_H_B_ITEM_ORDER (tb->tb_path, 0); + dest_bi->tb = tb; + dest_bi->bi_bh = tb->R[0]; + dest_bi->bi_parent = tb->FR[0]; + dest_bi->bi_position = get_right_neighbor_position (tb, 0); + *first_last = LAST_TO_FIRST; + break; + + case LEAF_FROM_R_TO_L: /* it is used in balance_leaf_when_delete */ + src_bi->tb = tb; + src_bi->bi_bh = tb->R[0]; + src_bi->bi_parent = tb->FR[0]; + src_bi->bi_position = get_right_neighbor_position (tb, 0); + dest_bi->tb = tb; + dest_bi->bi_bh = tb->L[0]; + dest_bi->bi_parent = tb->FL[0]; + dest_bi->bi_position = get_left_neighbor_position (tb, 0); + *first_last = FIRST_TO_LAST; + break; + + case LEAF_FROM_L_TO_R: /* it is used in balance_leaf_when_delete */ + src_bi->tb = tb; + src_bi->bi_bh = tb->L[0]; + src_bi->bi_parent = tb->FL[0]; + src_bi->bi_position = get_left_neighbor_position (tb, 0); + dest_bi->tb = tb; + dest_bi->bi_bh = tb->R[0]; + dest_bi->bi_parent = tb->FR[0]; + dest_bi->bi_position = get_right_neighbor_position (tb, 0); + *first_last = LAST_TO_FIRST; + break; + + case LEAF_FROM_S_TO_SNEW: + src_bi->tb = tb; + src_bi->bi_bh = PATH_PLAST_BUFFER (tb->tb_path); + src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, 0); + src_bi->bi_position = PATH_H_B_ITEM_ORDER (tb->tb_path, 0); + dest_bi->tb = tb; + dest_bi->bi_bh = Snew; + dest_bi->bi_parent = 0; + dest_bi->bi_position = 0; + *first_last = LAST_TO_FIRST; + break; + + default: + reiserfs_panic (0, "vs-10250: leaf_define_dest_src_infos: shift type is unknown (%d)", shift_mode); + } +#ifdef CONFIG_REISERFS_CHECK + if (src_bi->bi_bh == 0 || dest_bi->bi_bh == 0) { + reiserfs_panic (0, "vs-10260: leaf_define_dest_src_etc: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly", + shift_mode, src_bi->bi_bh, dest_bi->bi_bh); + } +#endif +} + + + + +/* copy mov_num items and mov_bytes of the (mov_num-1)th item to + neighbor. Delete them from source */ +int leaf_move_items (int shift_mode, struct tree_balance * tb, int mov_num, int mov_bytes, struct buffer_head * Snew) +{ + int ret_value; + struct buffer_info dest_bi, src_bi; + int first_last; + + leaf_define_dest_src_infos (shift_mode, tb, &dest_bi, &src_bi, &first_last, Snew); + + ret_value = leaf_copy_items (&dest_bi, src_bi.bi_bh, first_last, mov_num, mov_bytes); + + leaf_delete_items (&src_bi, first_last, (first_last == FIRST_TO_LAST) ? 0 : (B_NR_ITEMS(src_bi.bi_bh) - mov_num), mov_num, mov_bytes); + + + return ret_value; +} + + +/* Shift shift_num items (and shift_bytes of last shifted item if shift_bytes != -1) + from S[0] to L[0] and replace the delimiting key */ +int leaf_shift_left (struct tree_balance * tb, int shift_num, int shift_bytes) +{ + struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path); + int i; + + /* move shift_num (and shift_bytes bytes) items from S[0] to left neighbor L[0] */ + i = leaf_move_items (LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, 0); + + if ( shift_num ) { + if (B_NR_ITEMS (S0) == 0) { /* number of items in S[0] == 0 */ + +#ifdef CONFIG_REISERFS_CHECK + if ( shift_bytes != -1 ) + reiserfs_panic (tb->tb_sb, "vs-10270: leaf_shift_left: S0 is empty now, but shift_bytes != -1 (%d)", shift_bytes); + + if (tb->tb_mode == M_PASTE || tb->tb_mode == M_INSERT) { + print_cur_tb ("vs-10275"); + reiserfs_panic (tb->tb_sb, "vs-10275: leaf_shift_left: balance condition corrupted (%c)", tb->tb_mode); + } +#endif + + if (PATH_H_POSITION (tb->tb_path, 1) == 0) + replace_key (tb, tb->CFL[0], tb->lkey[0], PATH_H_PPARENT (tb->tb_path, 0), 0); + +#if 0 + /* change right_delimiting_key field in L0's block header */ + copy_key (B_PRIGHT_DELIM_KEY(tb->L[0]), B_PRIGHT_DELIM_KEY (S0)); +#endif + } else { + /* replace lkey in CFL[0] by 0-th key from S[0]; */ + replace_key (tb, tb->CFL[0], tb->lkey[0], S0, 0); + +#if 0 + /* change right_delimiting_key field in L0's block header */ + copy_key (B_PRIGHT_DELIM_KEY(tb->L[0]), B_N_PKEY (S0, 0)); +#endif +#ifdef CONFIG_REISERFS_CHECK + if (shift_bytes != -1 && !(is_direntry_le_ih (B_N_PITEM_HEAD (S0, 0)) + && !I_ENTRY_COUNT (B_N_PITEM_HEAD (S0, 0)))) { + if (!op_is_left_mergeable (B_N_PKEY (S0, 0), S0->b_size)) { + reiserfs_panic (tb->tb_sb, "vs-10280: leaf_shift_left: item must be mergeable"); + } + } +#endif + } + } + + return i; +} + + + + + +/* CLEANING STOPPED HERE */ + + + + +/* Shift shift_num (shift_bytes) items from S[0] to the right neighbor, and replace the delimiting key */ +int leaf_shift_right( + struct tree_balance * tb, + int shift_num, + int shift_bytes + ) +{ + // struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path); + int ret_value; + + /* move shift_num (and shift_bytes) items from S[0] to right neighbor R[0] */ + ret_value = leaf_move_items (LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, 0); + + /* replace rkey in CFR[0] by the 0-th key from R[0] */ + if (shift_num) { + replace_key (tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); + +#if 0 + /* change right_delimiting_key field in S0's block header */ + copy_key (B_PRIGHT_DELIM_KEY(S0), B_N_PKEY (tb->R[0], 0)); +#endif + } + + return ret_value; +} + + + +static void leaf_delete_items_entirely (struct buffer_info * bi, + int first, int del_num); +/* If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR. + If not. + If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of + the first item. Part defined by del_bytes. Don't delete first item header + If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of + the last item . Part defined by del_bytes. Don't delete last item header. +*/ +void leaf_delete_items (struct buffer_info * cur_bi, int last_first, + int first, int del_num, int del_bytes) +{ + struct buffer_head * bh; + int item_amount = B_NR_ITEMS (bh = cur_bi->bi_bh); + +#ifdef CONFIG_REISERFS_CHECK + if ( !bh ) + reiserfs_panic (0, "leaf_delete_items: 10155: bh is not defined"); + + if ( del_num < 0 ) + reiserfs_panic (0, "leaf_delete_items: 10160: del_num can not be < 0. del_num==%d", del_num); + + if ( first < 0 || first + del_num > item_amount ) + reiserfs_panic (0, "leaf_delete_items: 10165: invalid number of first item to be deleted (%d) or " + "no so much items (%d) to delete (only %d)", first, first + del_num, item_amount); +#endif + + if ( del_num == 0 ) + return; + + if ( first == 0 && del_num == item_amount && del_bytes == -1 ) { + make_empty_node (cur_bi); + do_balance_mark_leaf_dirty (cur_bi->tb, bh, 0); + return; + } + + if ( del_bytes == -1 ) + /* delete del_num items beginning from item in position first */ + leaf_delete_items_entirely (cur_bi, first, del_num); + else { + if ( last_first == FIRST_TO_LAST ) { + /* delete del_num-1 items beginning from item in position first */ + leaf_delete_items_entirely (cur_bi, first, del_num-1); + + /* delete the part of the first item of the bh + do not delete item header + */ + leaf_cut_from_buffer (cur_bi, 0, 0, del_bytes); + } else { + struct item_head * ih; + int len; + + /* delete del_num-1 items beginning from item in position first+1 */ + leaf_delete_items_entirely (cur_bi, first+1, del_num-1); + + if (is_direntry_le_ih (ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh)-1))) /* the last item is directory */ + /* len = numbers of directory entries in this item */ + len = le16_to_cpu (ih->u.ih_entry_count); + else + /* len = body len of item */ + len = le16_to_cpu (ih->ih_item_len); + + /* delete the part of the last item of the bh + do not delete item header + */ + leaf_cut_from_buffer (cur_bi, B_NR_ITEMS(bh)-1, len - del_bytes, del_bytes); + } + } +} + + +/* insert item into the leaf node in position before */ +void leaf_insert_into_buf (struct buffer_info * bi, int before, + struct item_head * inserted_item_ih, + const char * inserted_item_body, + int zeros_number) +{ + struct buffer_head * bh = bi->bi_bh; + int nr; + struct block_head * blkh; + struct item_head * ih; + int i; + int last_loc, unmoved_loc; + char * to; + + + nr = le16_to_cpu ((blkh = B_BLK_HEAD (bh))->blk_nr_item); + +#ifdef CONFIG_REISERFS_CHECK + /* check free space */ + if (le16_to_cpu (blkh->blk_free_space) < le16_to_cpu (inserted_item_ih->ih_item_len) + IH_SIZE) + reiserfs_panic (0, "leaf_insert_into_buf: 10170: " + "not enough free space in block %z, new item %h", + bh, inserted_item_ih); + if (zeros_number > inserted_item_ih->ih_item_len) + reiserfs_panic (0, "vs-10172: leaf_insert_into_buf: " + "zero number == %d, item length == %d", zeros_number, inserted_item_ih->ih_item_len); +#endif /* CONFIG_REISERFS_CHECK */ + + + /* get item new item must be inserted before */ + ih = B_N_PITEM_HEAD (bh, before); + + /* prepare space for the body of new item */ + last_loc = nr ? ih[nr - before - 1].ih_item_location : bh->b_size; + unmoved_loc = before ? (ih-1)->ih_item_location : bh->b_size; + + memmove (bh->b_data + last_loc - inserted_item_ih->ih_item_len, + bh->b_data + last_loc, unmoved_loc - last_loc); + + to = bh->b_data + unmoved_loc - inserted_item_ih->ih_item_len; + memset (to, 0, zeros_number); + to += zeros_number; + + /* copy body to prepared space */ + if (inserted_item_body) + memmove (to, inserted_item_body, inserted_item_ih->ih_item_len - zeros_number); + else + memset(to, '\0', inserted_item_ih->ih_item_len - zeros_number); + + /* insert item header */ + memmove (ih + 1, ih, IH_SIZE * (nr - before)); + memmove (ih, inserted_item_ih, IH_SIZE); + + /* change locations */ + for (i = before; i < nr + 1; i ++) + ih[i-before].ih_item_location = + (unmoved_loc -= ih[i-before].ih_item_len); + + /* sizes, free space, item number */ + blkh->blk_nr_item = cpu_to_le16 (le16_to_cpu (blkh->blk_nr_item) + 1); + blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) - + (IH_SIZE + inserted_item_ih->ih_item_len)); + + do_balance_mark_leaf_dirty (bi->tb, bh, 1); + + if (bi->bi_parent) { + B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size += (IH_SIZE + inserted_item_ih->ih_item_len); + do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0); + } +} + + +/* paste paste_size bytes to affected_item_num-th item. + When item is a directory, this only prepare space for new entries */ +void leaf_paste_in_buffer (struct buffer_info * bi, int affected_item_num, + int pos_in_item, int paste_size, + const char * body, + int zeros_number) +{ + struct buffer_head * bh = bi->bi_bh; + int nr; + struct block_head * blkh; + struct item_head * ih; + int i; + int last_loc, unmoved_loc; + + + nr = le16_to_cpu ((blkh = B_BLK_HEAD(bh))->blk_nr_item); + +#ifdef CONFIG_REISERFS_CHECK + /* check free space */ + if (le16_to_cpu (blkh->blk_free_space) < paste_size) + reiserfs_panic (0, "leaf_paste_in_buffer: 10175: not enough free space: needed %d, available %d", + paste_size, le16_to_cpu (blkh->blk_free_space)); + if (zeros_number > paste_size) { + print_cur_tb ("10177"); + reiserfs_panic (0, "vs-10177: leaf_paste_in_buffer: zero number == %d, paste_size == %d", + zeros_number, paste_size); + } +#endif /* CONFIG_REISERFS_CHECK */ + + + /* item to be appended */ + ih = B_N_PITEM_HEAD(bh, affected_item_num); + + last_loc = ih[nr - affected_item_num - 1].ih_item_location; + unmoved_loc = affected_item_num ? (ih-1)->ih_item_location : bh->b_size; + + /* prepare space */ + memmove (bh->b_data + last_loc - paste_size, bh->b_data + last_loc, + unmoved_loc - last_loc); + + + /* change locations */ + for (i = affected_item_num; i < nr; i ++) + ih[i-affected_item_num].ih_item_location -= paste_size; + + if ( body ) { + if (!is_direntry_le_ih (ih)) { + if (!pos_in_item) { + /* shift data to right */ + memmove (bh->b_data + ih->ih_item_location + paste_size, + bh->b_data + ih->ih_item_location, ih->ih_item_len); + /* paste data in the head of item */ + memset (bh->b_data + ih->ih_item_location, 0, zeros_number); + memcpy (bh->b_data + ih->ih_item_location + zeros_number, body, paste_size - zeros_number); + } else { + memset (bh->b_data + unmoved_loc - paste_size, 0, zeros_number); + memcpy (bh->b_data + unmoved_loc - paste_size + zeros_number, body, paste_size - zeros_number); + } + } + } + else + memset(bh->b_data + unmoved_loc - paste_size,'\0',paste_size); + + ih->ih_item_len += paste_size; + + /* change free space */ + blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) - paste_size); + + do_balance_mark_leaf_dirty (bi->tb, bh, 0); + + if (bi->bi_parent) { + B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size += paste_size; + do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0); + } +} + + +/* cuts DEL_COUNT entries beginning from FROM-th entry. Directory item + does not have free space, so it moves DEHs and remaining records as + necessary. Return value is size of removed part of directory item + in bytes. */ +static int leaf_cut_entries ( + struct buffer_head * bh, + struct item_head * ih, + int from, + int del_count + ) +{ + char * item; + struct reiserfs_de_head * deh; + int prev_record_offset; /* offset of record, that is (from-1)th */ + char * prev_record; /* */ + int cut_records_len; /* length of all removed records */ + int i; + + +#ifdef CONFIG_REISERFS_CHECK + /* make sure, that item is directory and there are enough entries to + remove */ + if (!is_direntry_le_ih (ih)) + reiserfs_panic (0, "leaf_cut_entries: 10180: item is not directory item"); + + if (I_ENTRY_COUNT(ih) < from + del_count) + reiserfs_panic (0, "leaf_cut_entries: 10185: item contains not enough entries: entry_cout = %d, from = %d, to delete = %d", + I_ENTRY_COUNT(ih), from, del_count); +#endif + + if (del_count == 0) + return 0; + + /* first byte of item */ + item = bh->b_data + ih->ih_item_location; + + /* entry head array */ + deh = B_I_DEH (bh, ih); + + /* first byte of remaining entries, those are BEFORE cut entries + (prev_record) and length of all removed records (cut_records_len) */ + prev_record_offset = (from ? deh[from - 1].deh_location : ih->ih_item_len); + cut_records_len = prev_record_offset/*from_record*/ - deh[from + del_count - 1].deh_location; + prev_record = item + prev_record_offset; + + + /* adjust locations of remaining entries */ + for (i = I_ENTRY_COUNT(ih) - 1; i > from + del_count - 1; i --) + deh[i].deh_location -= (DEH_SIZE * del_count); + + for (i = 0; i < from; i ++) + deh[i].deh_location -= DEH_SIZE * del_count + cut_records_len; + + I_ENTRY_COUNT(ih) -= del_count; + + /* shift entry head array and entries those are AFTER removed entries */ + memmove ((char *)(deh + from), + deh + from + del_count, + prev_record - cut_records_len - (char *)(deh + from + del_count)); + + /* shift records, those are BEFORE removed entries */ + memmove (prev_record - cut_records_len - DEH_SIZE * del_count, + prev_record, item + ih->ih_item_len - prev_record); + + return DEH_SIZE * del_count + cut_records_len; +} + + +/* when cut item is part of regular file + pos_in_item - first byte that must be cut + cut_size - number of bytes to be cut beginning from pos_in_item + + when cut item is part of directory + pos_in_item - number of first deleted entry + cut_size - count of deleted entries + */ +void leaf_cut_from_buffer (struct buffer_info * bi, int cut_item_num, + int pos_in_item, int cut_size) +{ + int nr; + struct buffer_head * bh = bi->bi_bh; + struct block_head * blkh; + struct item_head * ih; + int last_loc, unmoved_loc; + int i; + + nr = le16_to_cpu ((blkh = B_BLK_HEAD (bh))->blk_nr_item); + + /* item head of truncated item */ + ih = B_N_PITEM_HEAD (bh, cut_item_num); + + if (is_direntry_le_ih (ih)) { + /* first cut entry ()*/ + cut_size = leaf_cut_entries (bh, ih, pos_in_item, cut_size); + if (pos_in_item == 0) { + /* change key */ +#ifdef CONFIG_REISERFS_CHECK + if (cut_item_num) + reiserfs_panic (0, "leaf_cut_from_buffer: 10190: " + "when 0-th enrty of item is cut, that item must be first in the node, not %d-th", cut_item_num); +#endif + /* change item key by key of first entry in the item */ + set_le_ih_k_offset (ih, le32_to_cpu (B_I_DEH (bh, ih)->deh_offset)); + /*memcpy (&ih->ih_key.k_offset, &(B_I_DEH (bh, ih)->deh_offset), SHORT_KEY_SIZE);*/ + } + } else { + /* item is direct or indirect */ +#ifdef CONFIG_REISERFS_CHECK + if (is_statdata_le_ih (ih)) + reiserfs_panic (0, "leaf_cut_from_buffer: 10195: item is stat data"); + + if (pos_in_item && pos_in_item + cut_size != le16_to_cpu (ih->ih_item_len) ) + reiserfs_panic (0, "cut_from_buf: 10200: invalid offset (%lu) or trunc_size (%lu) or ih_item_len (%lu)", + pos_in_item, cut_size, le16_to_cpu (ih->ih_item_len)); +#endif + + /* shift item body to left if cut is from the head of item */ + if (pos_in_item == 0) { + memmove (bh->b_data + le16_to_cpu (ih->ih_item_location), bh->b_data + le16_to_cpu (ih->ih_item_location) + cut_size, + le16_to_cpu (ih->ih_item_len) - cut_size); + + /* change key of item */ + if (is_direct_le_ih (ih)) + set_le_ih_k_offset (ih, le_ih_k_offset (ih) + cut_size); + else { + set_le_ih_k_offset (ih, le_ih_k_offset (ih) + (cut_size / UNFM_P_SIZE) * bh->b_size); +#ifdef CONFIG_REISERFS_CHECK + if ( le16_to_cpu (ih->ih_item_len) == cut_size && get_ih_free_space (ih) ) + reiserfs_panic (0, "leaf_cut_from_buf: 10205: invalid ih_free_space (%h)", ih); +#endif + } + } + } + + + /* location of the last item */ + last_loc = le16_to_cpu (ih[nr - cut_item_num - 1].ih_item_location); + + /* location of the item, which is remaining at the same place */ + unmoved_loc = cut_item_num ? le16_to_cpu ((ih-1)->ih_item_location) : bh->b_size; + + + /* shift */ + memmove (bh->b_data + last_loc + cut_size, bh->b_data + last_loc, + unmoved_loc - last_loc - cut_size); + + /* change item length */ +/* ih->ih_item_len -= cut_size;*/ + ih->ih_item_len = cpu_to_le16 (le16_to_cpu (ih->ih_item_len) - cut_size); + + if (is_indirect_le_ih (ih)) { + if (pos_in_item) + set_ih_free_space (ih, 0); + } + + /* change locations */ + for (i = cut_item_num; i < nr; i ++) +/* ih[i-cut_item_num].ih_item_location += cut_size;*/ + ih[i-cut_item_num].ih_item_location = + cpu_to_le16 (le16_to_cpu (ih[i-cut_item_num].ih_item_location) + cut_size); + + /* size, free space */ + blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) + cut_size); + + do_balance_mark_leaf_dirty (bi->tb, bh, 0); + + if (bi->bi_parent) { + B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size -= cut_size; + do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0); + } +} + + +/* delete del_num items from buffer starting from the first'th item */ +static void leaf_delete_items_entirely (struct buffer_info * bi, + int first, int del_num) +{ + struct buffer_head * bh = bi->bi_bh; + int nr; + int i, j; + int last_loc, last_removed_loc; + struct block_head * blkh; + struct item_head * ih; + +#ifdef CONFIG_REISERFS_CHECK + if (bh == NULL) + reiserfs_panic (0, "leaf_delete_items_entirely: 10210: buffer is 0"); + + if (del_num < 0) + reiserfs_panic (0, "leaf_delete_items_entirely: 10215: del_num less than 0 (%d)", del_num); +#endif /* CONFIG_REISERFS_CHECK */ + + if (del_num == 0) + return; + + nr = le16_to_cpu ((blkh = B_BLK_HEAD(bh))->blk_nr_item); + +#ifdef CONFIG_REISERFS_CHECK + if (first < 0 || first + del_num > nr) + reiserfs_panic (0, "leaf_delete_items_entirely: 10220: first=%d, number=%d, there is %d items", first, del_num, nr); +#endif /* CONFIG_REISERFS_CHECK */ + + if (first == 0 && del_num == nr) { + /* this does not work */ + make_empty_node (bi); + + do_balance_mark_leaf_dirty (bi->tb, bh, 0); + return; + } + + ih = B_N_PITEM_HEAD (bh, first); + + /* location of unmovable item */ + j = (first == 0) ? bh->b_size : (ih-1)->ih_item_location; + + /* delete items */ + last_loc = ih[nr-1-first].ih_item_location; + last_removed_loc = ih[del_num-1].ih_item_location; + + memmove (bh->b_data + last_loc + j - last_removed_loc, + bh->b_data + last_loc, last_removed_loc - last_loc); + + /* delete item headers */ + memmove (ih, ih + del_num, (nr - first - del_num) * IH_SIZE); + + /* change item location */ + for (i = first; i < nr - del_num; i ++) + ih[i-first].ih_item_location += j - last_removed_loc; + + /* sizes, item number */ + blkh->blk_nr_item = cpu_to_le16 (le16_to_cpu (blkh->blk_nr_item) - del_num); + blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) + (j - last_removed_loc + IH_SIZE * del_num)); + + do_balance_mark_leaf_dirty (bi->tb, bh, 0); + + if (bi->bi_parent) { + B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size -= j - last_removed_loc + IH_SIZE * del_num; + do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0); + } +} + + + + + +/* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */ +void leaf_paste_entries ( + struct buffer_head * bh, + int item_num, + int before, + int new_entry_count, + struct reiserfs_de_head * new_dehs, + const char * records, + int paste_size + ) +{ + struct item_head * ih; + char * item; + struct reiserfs_de_head * deh; + char * insert_point; + int i, old_entry_num; + + if (new_entry_count == 0) + return; + + ih = B_N_PITEM_HEAD(bh, item_num); + +#ifdef CONFIG_REISERFS_CHECK + /* make sure, that item is directory, and there are enough records in it */ + if (!is_direntry_le_ih (ih)) + reiserfs_panic (0, "leaf_paste_entries: 10225: item is not directory item"); + + if (I_ENTRY_COUNT (ih) < before) + reiserfs_panic (0, "leaf_paste_entries: 10230: there are no entry we paste entries before. entry_count = %d, before = %d", + I_ENTRY_COUNT (ih), before); +#endif + + + /* first byte of dest item */ + item = bh->b_data + ih->ih_item_location; + + /* entry head array */ + deh = B_I_DEH (bh, ih); + + /* new records will be pasted at this point */ + insert_point = item + (before ? deh[before - 1].deh_location : (ih->ih_item_len - paste_size)); + + /* adjust locations of records that will be AFTER new records */ + for (i = I_ENTRY_COUNT(ih) - 1; i >= before; i --) + deh[i].deh_location += DEH_SIZE * new_entry_count; + + /* adjust locations of records that will be BEFORE new records */ + for (i = 0; i < before; i ++) + deh[i].deh_location += paste_size; + + old_entry_num = I_ENTRY_COUNT(ih); + I_ENTRY_COUNT(ih) += new_entry_count; + + /* prepare space for pasted records */ + memmove (insert_point + paste_size, insert_point, item + (ih->ih_item_len - paste_size) - insert_point); + + /* copy new records */ + memcpy (insert_point + DEH_SIZE * new_entry_count, records, + paste_size - DEH_SIZE * new_entry_count); + + /* prepare space for new entry heads */ + deh += before; + memmove ((char *)(deh + new_entry_count), deh, insert_point - (char *)deh); + + /* copy new entry heads */ + deh = (struct reiserfs_de_head *)((char *)deh); + memcpy (deh, new_dehs, DEH_SIZE * new_entry_count); + + /* set locations of new records */ + for (i = 0; i < new_entry_count; i ++) + deh[i].deh_location += + (- new_dehs[new_entry_count - 1].deh_location + insert_point + DEH_SIZE * new_entry_count - item); + + + /* change item key if neccessary (when we paste before 0-th entry */ + if (!before) + { +#ifdef CONFIG_REISERFS_CHECK +/* + if ( old_entry_num && COMP_SHORT_KEYS ((unsigned long *)&ih->ih_key.k_offset, + &(new_dehs->deh_offset)) <= 0) + reiserfs_panic (0, "leaf_paste_entries: 10235: new key must be less, that old key"); +*/ +#endif + set_le_ih_k_offset (ih, le32_to_cpu (new_dehs->deh_offset)); +/* memcpy (&ih->ih_key.k_offset, + &new_dehs->deh_offset, SHORT_KEY_SIZE);*/ + } + +#ifdef CONFIG_REISERFS_CHECK + { + int prev, next; + /* check record locations */ + deh = B_I_DEH (bh, ih); + for (i = 0; i < I_ENTRY_COUNT(ih); i ++) { + next = (i < I_ENTRY_COUNT(ih) - 1) ? deh[i + 1].deh_location : 0; + prev = (i != 0) ? deh[i - 1].deh_location : 0; + + if (prev && prev <= deh[i].deh_location) + reiserfs_warning ("vs-10240: leaf_paste_entries: directory item (%h) corrupted (prev %a, cur(%d) %a)\n", + ih, deh + i - 1, i, deh + i); + if (next && next >= deh[i].deh_location) + reiserfs_warning ("vs-10250: leaf_paste_entries: directory item (%h) corrupted (cur(%d) %a, next %a)\n", + ih, i, deh + i, deh + i + 1); + } + } +#endif + +} + + + diff -u -r --new-file linux/fs/reiserfs/namei.c v2.4.0-test8/linux/fs/reiserfs/namei.c --- linux/fs/reiserfs/namei.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/namei.c Mon Sep 11 05:21:50 2000 @@ -0,0 +1,1185 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ + +#ifdef __KERNEL__ + +#include <linux/sched.h> +#include <linux/reiserfs_fs.h> +#include <linux/smp_lock.h> + +#else + +#include "nokernel.h" + +#endif + + + + +// directory item contains array of entry headers. This performs +// binary search through that array +static int bin_search_in_dir_item (struct reiserfs_dir_entry * de, loff_t off) +{ + struct item_head * ih = de->de_ih; + struct reiserfs_de_head * deh = de->de_deh; + int rbound, lbound, j; + + lbound = 0; + rbound = I_ENTRY_COUNT (ih) - 1; + + for (j = (rbound + lbound) / 2; lbound <= rbound; j = (rbound + lbound) / 2) { + if (off < deh_offset (deh + j)) { + rbound = j - 1; + continue; + } + if (off > deh_offset (deh + j)) { + lbound = j + 1; + continue; + } + // this is not name found, but matched third key component + de->de_entry_num = j; + return NAME_FOUND; + } + + de->de_entry_num = lbound; + return NAME_NOT_FOUND; +} + + +// +static inline void set_de_item_location (struct reiserfs_dir_entry * de, struct path * path) +{ + de->de_bh = get_bh (path); + de->de_ih = get_ih (path); + de->de_deh = B_I_DEH (de->de_bh, de->de_ih); + de->de_item_num = PATH_LAST_POSITION (path); +} + + +// de_bh, de_ih, de_deh (points to first element of array), de_item_num is set +inline void set_de_name_and_namelen (struct reiserfs_dir_entry * de) +{ + struct reiserfs_de_head * deh = de->de_deh + de->de_entry_num; + + if (de->de_entry_num >= ih_entry_count (de->de_ih)) + BUG (); + + de->de_entrylen = entry_length (de->de_bh, de->de_ih, de->de_entry_num); + de->de_namelen = de->de_entrylen - (de_with_sd (deh) ? SD_SIZE : 0); + de->de_name = B_I_PITEM (de->de_bh, de->de_ih) + le16_to_cpu (deh->deh_location); + if (de->de_name[de->de_namelen - 1] == 0) + de->de_namelen = strlen (de->de_name); +} + + +// what entry points to +static inline void set_de_object_key (struct reiserfs_dir_entry * de) +{ + if (de->de_entry_num >= ih_entry_count (de->de_ih)) + BUG (); + de->de_dir_id = le32_to_cpu (de->de_deh[de->de_entry_num].deh_dir_id); + de->de_objectid = le32_to_cpu (de->de_deh[de->de_entry_num].deh_objectid); +} + + +static inline void store_de_entry_key (struct reiserfs_dir_entry * de) +{ + struct reiserfs_de_head * deh = de->de_deh + de->de_entry_num; + + if (de->de_entry_num >= ih_entry_count (de->de_ih)) + BUG (); + + /* store key of the found entry */ + de->de_entry_key.version = ITEM_VERSION_1; + de->de_entry_key.on_disk_key.k_dir_id = le32_to_cpu (de->de_ih->ih_key.k_dir_id); + de->de_entry_key.on_disk_key.k_objectid = le32_to_cpu (de->de_ih->ih_key.k_objectid); + set_cpu_key_k_offset (&(de->de_entry_key), deh_offset (deh)); + set_cpu_key_k_type (&(de->de_entry_key), TYPE_DIRENTRY); +} + + +/* first calls search_by_key, then, if item is not found looks for the + entry inside directory item found by search_by_key. (We assign a + key to each directory item, and place multiple entries in a single + directory item.) Fills the path to the entry, and to the entry + position in the item */ +/* The function is NOT SCHEDULE-SAFE! */ +int search_by_entry_key (struct super_block * sb, struct cpu_key * key, + struct path * path, struct reiserfs_dir_entry * de) +{ + int retval; + + retval = search_item (sb, key, path); + switch (retval) { + case ITEM_NOT_FOUND: + if (!PATH_LAST_POSITION (path)) { + reiserfs_warning ("vs-7000: search_by_entry_key: search_by_key returned item position == 0"); + pathrelse(path) ; + return IO_ERROR ; + } + PATH_LAST_POSITION (path) --; + + case ITEM_FOUND: + break; + + case IO_ERROR: + return retval; + + default: + pathrelse (path); + reiserfs_warning ("vs-7002: search_by_entry_key: no path to here"); + return IO_ERROR; + } + + set_de_item_location (de, path); + +#ifdef CONFIG_REISERFS_CHECK + if (!is_direntry_le_ih (de->de_ih) || + COMP_SHORT_KEYS (&(de->de_ih->ih_key), key)) { + print_block (de->de_bh, 0, -1, -1); + reiserfs_panic (sb, "vs-7005: search_by_entry_key: found item %h is not directory item or " + "does not belong to the same directory as key %k", de->de_ih, key); + } +#endif /* CONFIG_REISERFS_CHECK */ + + /* binary search in directory item by third componen t of the + key. sets de->de_entry_num of de */ + retval = bin_search_in_dir_item (de, cpu_key_k_offset (key)); + path->pos_in_item = de->de_entry_num; + if (retval != NAME_NOT_FOUND) { + // ugly, but rename needs de_bh, de_deh, de_name, de_namelen, de_objectid set + set_de_name_and_namelen (de); + set_de_object_key (de); + } + return retval; +} + + + +/* Keyed 32-bit hash function using TEA in a Davis-Meyer function */ +static __u32 get_third_component (struct super_block * s, + const char * name, int len) +{ + __u32 res; + + if (!len || (len == 1 && name[0] == '.')) + return DOT_OFFSET; + if (len == 2 && name[0] == '.' && name[1] == '.') + return DOT_DOT_OFFSET; + + res = s->u.reiserfs_sb.s_hash_function (name, len); + + // take bits from 7-th to 30-th including both bounds + res = GET_HASH_VALUE(res); + if (res == 0) + // needed to have no names before "." and ".." those have hash + // value == 0 and generation conters 1 and 2 accordingly + res = 128; + return res + MAX_GENERATION_NUMBER; +} + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +static int reiserfs_match (struct reiserfs_dir_entry * de, + const char * name, int namelen) +{ + int retval = NAME_NOT_FOUND; + + if ((namelen == de->de_namelen) && + !memcmp(de->de_name, name, de->de_namelen)) + retval = (de_visible (de->de_deh + de->de_entry_num) ? NAME_FOUND : NAME_FOUND_INVISIBLE); + + return retval; +} + + +/* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */ +static int linear_search_in_dir_item (struct cpu_key * key, struct reiserfs_dir_entry * de, + const char * name, int namelen) +{ + struct reiserfs_de_head * deh = de->de_deh; + int retval; + int i; + + i = de->de_entry_num; + + if (i == I_ENTRY_COUNT (de->de_ih) || + GET_HASH_VALUE (deh_offset (deh + i)) != GET_HASH_VALUE (cpu_key_k_offset (key))) { + i --; + } + +#ifdef CONFIG_REISERFS_CHECK + if (de->de_deh != B_I_DEH (de->de_bh, de->de_ih)) + reiserfs_panic (0, "vs-7010: linear_search_in_dir_item: array of entry headers not found"); +#endif /* CONFIG_REISERFS_CHECK */ + + deh += i; + + for (; i >= 0; i --, deh --) { + if (GET_HASH_VALUE (deh_offset (deh)) != + GET_HASH_VALUE (cpu_key_k_offset (key))) { + // hash value does not match, no need to check whole name + return NAME_NOT_FOUND; + } + + /* mark, that this generation number is used */ + if (de->de_gen_number_bit_string) + set_bit (GET_GENERATION_NUMBER (deh_offset (deh)), de->de_gen_number_bit_string); + + // calculate pointer to name and namelen + de->de_entry_num = i; + set_de_name_and_namelen (de); + + if ((retval = reiserfs_match (de, name, namelen)) != NAME_NOT_FOUND) { + // de's de_name, de_namelen, de_recordlen are set. Fill the rest: + + // key of pointed object + set_de_object_key (de); + + store_de_entry_key (de); + + // retval can be NAME_FOUND or NAME_FOUND_INVISIBLE + return retval; + } + } + + if (GET_GENERATION_NUMBER (le_ih_k_offset (de->de_ih)) == 0) + /* we have reached left most entry in the node. In common we + have to go to the left neighbor, but if generation counter + is 0 already, we know for sure, that there is no name with + the same hash value */ + // FIXME: this work correctly only because hash value can not + // be 0. Btw, in case of Yura's hash it is probably possible, + // so, this is a bug + return NAME_NOT_FOUND; + +#ifdef CONFIG_REISERFS_CHECK + if (de->de_item_num) + reiserfs_panic (0, "vs-7015: linear_search_in_dir_item: " + "two diritems of the same directory in one node?"); +#endif /* CONFIG_REISERFS_CHECK */ + + return GOTO_PREVIOUS_ITEM; +} + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +// may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND +// FIXME: should add something like IOERROR +static int reiserfs_find_entry (struct inode * dir, const char * name, int namelen, + struct path * path_to_entry, struct reiserfs_dir_entry * de) +{ + struct cpu_key key_to_search; + int retval; + + + if (namelen > REISERFS_MAX_NAME_LEN (dir->i_sb->s_blocksize)) + return NAME_NOT_FOUND; + + /* we will search for this key in the tree */ + make_cpu_key (&key_to_search, dir, + get_third_component (dir->i_sb, name, namelen), TYPE_DIRENTRY, 3); + + while (1) { + retval = search_by_entry_key (dir->i_sb, &key_to_search, path_to_entry, de); + if (retval == IO_ERROR) + // FIXME: still has to be dealt with + + /* I want you to conform to our error + printing standard. How many times + do I have to ask? -Hans */ + + BUG (); + + /* compare names for all entries having given hash value */ + retval = linear_search_in_dir_item (&key_to_search, de, name, namelen); + if (retval != GOTO_PREVIOUS_ITEM) { + /* there is no need to scan directory anymore. Given entry found or does not exist */ + path_to_entry->pos_in_item = de->de_entry_num; + return retval; + } + + /* there is left neighboring item of this directory and given entry can be there */ + set_cpu_key_k_offset (&key_to_search, le_ih_k_offset (de->de_ih) - 1); + pathrelse (path_to_entry); + + } /* while (1) */ +} + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +struct dentry * reiserfs_lookup (struct inode * dir, struct dentry * dentry) +{ + int retval; + struct inode * inode = 0; + struct reiserfs_dir_entry de; + INITIALIZE_PATH (path_to_entry); + + reiserfs_check_lock_depth("lookup") ; + + if (dentry->d_name.len > REISERFS_MAX_NAME_LEN (dir->i_sb->s_blocksize)) + return ERR_PTR(-ENAMETOOLONG); + + de.de_gen_number_bit_string = 0; + retval = reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path_to_entry, &de); + pathrelse (&path_to_entry); + if (retval == NAME_FOUND) { + inode = reiserfs_iget (dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); + if (!inode) { + return ERR_PTR(-EACCES); + } + } + + d_add(dentry, inode); + return NULL; +} + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +/* add entry to the directory (entry can be hidden). Does not mark dir + inode dirty, do it after successesfull call to it */ +static int reiserfs_add_entry (struct reiserfs_transaction_handle *th, struct inode * dir, + const char * name, int namelen, struct inode * inode, + int visible) +{ + struct cpu_key entry_key; + struct reiserfs_de_head * deh; + INITIALIZE_PATH (path); + struct reiserfs_dir_entry de; + char bit_string [MAX_GENERATION_NUMBER / 8 + 1]; + int gen_number; + char small_buf[32+DEH_SIZE] ; /* 48 bytes now and we avoid kmalloc + if we create file with short name */ + char * buffer; + int buflen, paste_size; + int retval; + + + /* cannot allow items to be added into a busy deleted directory */ + if (!namelen) + return -EINVAL; + + if (namelen > REISERFS_MAX_NAME_LEN (dir->i_sb->s_blocksize)) + return -ENAMETOOLONG; + + /* each entry has unique key. compose it */ + make_cpu_key (&entry_key, dir, + get_third_component (dir->i_sb, name, namelen), TYPE_DIRENTRY, 3); + + /* get memory for composing the entry */ + buflen = DEH_SIZE + ROUND_UP (namelen); + if (buflen > sizeof (small_buf)) { + buffer = reiserfs_kmalloc (buflen, GFP_BUFFER, dir->i_sb); + if (buffer == 0) + return -ENOMEM; + } else + buffer = small_buf; + + paste_size = (old_format_only (dir->i_sb)) ? (DEH_SIZE + namelen) : buflen; + + /* fill buffer : directory entry head, name[, dir objectid | , stat data | ,stat data, dir objectid ] */ + deh = (struct reiserfs_de_head *)buffer; + deh->deh_location = 0; + deh->deh_offset = cpu_to_le32 (cpu_key_k_offset (&entry_key)); + deh->deh_state = 0; + /* put key (ino analog) to de */ + deh->deh_dir_id = INODE_PKEY (inode)->k_dir_id; + deh->deh_objectid = INODE_PKEY (inode)->k_objectid; + + /* copy name */ + memcpy ((char *)(deh + 1), name, namelen); + /* padd by 0s to the 4 byte boundary */ + padd_item ((char *)(deh + 1), ROUND_UP (namelen), namelen); + + /* entry is ready to be pasted into tree, set 'visibility' and 'stat data in entry' attributes */ + mark_de_without_sd (deh); + visible ? mark_de_visible (deh) : mark_de_hidden (deh); + + /* find the proper place for the new entry */ + memset (bit_string, 0, sizeof (bit_string)); + de.de_gen_number_bit_string = bit_string; + if (reiserfs_find_entry (dir, name, namelen, &path, &de) == NAME_FOUND) { + if (buffer != small_buf) + reiserfs_kfree (buffer, buflen, dir->i_sb); + pathrelse (&path); + return -EEXIST; + } + + if (find_first_nonzero_bit (bit_string, MAX_GENERATION_NUMBER + 1) < MAX_GENERATION_NUMBER + 1) { + /* there are few names with given hash value */ + gen_number = find_first_zero_bit (bit_string, MAX_GENERATION_NUMBER + 1); + if (gen_number > MAX_GENERATION_NUMBER) { + /* there is no free generation number */ + reiserfs_warning ("reiserfs_add_entry: Congratulations! we have got hash function screwed up\n"); + if (buffer != small_buf) + reiserfs_kfree (buffer, buflen, dir->i_sb); + pathrelse (&path); + return -EHASHCOLLISION;//EBADSLT + } + /* adjust offset of directory enrty */ + deh->deh_offset = cpu_to_le32 (SET_GENERATION_NUMBER (deh_offset (deh), gen_number)); + set_cpu_key_k_offset (&entry_key, le32_to_cpu (deh->deh_offset)); + + /* find place for new entry */ + if (search_by_entry_key (dir->i_sb, &entry_key, &path, &de) == NAME_FOUND) { + reiserfs_warning ("vs-7032: reiserfs_add_entry: " + "entry with this key (%k) already exists", &entry_key); + if (buffer != small_buf) + reiserfs_kfree (buffer, buflen, dir->i_sb); + pathrelse (&path); + return -EHASHCOLLISION; + } + } else { + deh->deh_offset = cpu_to_le32 (SET_GENERATION_NUMBER (le32_to_cpu (deh->deh_offset), 0)); + set_cpu_key_k_offset (&entry_key, le32_to_cpu (deh->deh_offset)); + } + + /* perform the insertion of the entry that we have prepared */ + retval = reiserfs_paste_into_item (th, &path, &entry_key, buffer, paste_size); + if (buffer != small_buf) + reiserfs_kfree (buffer, buflen, dir->i_sb); + if (retval) { + reiserfs_check_path(&path) ; + return retval; + } + + dir->i_size += paste_size; + dir->i_blocks = ((dir->i_size + 511) >> 9); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + if (!S_ISDIR (inode->i_mode) && visible) + // reiserfs_mkdir or reiserfs_rename will do that by itself + reiserfs_update_sd (th, dir); + + reiserfs_check_path(&path) ; + return 0; +} + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode) +{ + int retval; + struct inode * inode; + int windex ; + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 ; + struct reiserfs_transaction_handle th ; + + + inode = get_empty_inode() ; + if (!inode) { + return -ENOMEM ; + } + journal_begin(&th, dir->i_sb, jbegin_count) ; + th.t_caller = "create" ; + windex = push_journal_writer("reiserfs_create") ; + inode = reiserfs_new_inode (&th, dir, mode, 0, 0/*i_size*/, dentry, inode, &retval); + if (!inode) { + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + return retval; + } + + inode->i_op = &reiserfs_file_inode_operations; + inode->i_fop = &reiserfs_file_operations; + inode->i_mapping->a_ops = &reiserfs_address_space_operations ; + + retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, + inode, 1/*visible*/); + if (retval) { + inode->i_nlink--; + reiserfs_update_sd (&th, inode); + pop_journal_writer(windex) ; + // FIXME: should we put iput here and have stat data deleted + // in the same transactioin + journal_end(&th, dir->i_sb, jbegin_count) ; + iput (inode); + return retval; + } + + d_instantiate(dentry, inode); + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + return 0; +} + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode, int rdev) +{ + int retval; + struct inode * inode; + int windex ; + struct reiserfs_transaction_handle th ; + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; + + inode = get_empty_inode() ; + if (!inode) { + return -ENOMEM ; + } + journal_begin(&th, dir->i_sb, jbegin_count) ; + windex = push_journal_writer("reiserfs_mknod") ; + + inode = reiserfs_new_inode (&th, dir, mode, 0, 0/*i_size*/, dentry, inode, &retval); + if (!inode) { + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + return retval; + } + + init_special_inode(inode, mode, rdev) ; + + //FIXME: needed for block and char devices only + reiserfs_update_sd (&th, inode); + + retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, + inode, 1/*visible*/); + if (retval) { + inode->i_nlink--; + reiserfs_update_sd (&th, inode); + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + iput (inode); + return retval; + } + + d_instantiate(dentry, inode); + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + return 0; +} + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode) +{ + int retval; + struct inode * inode; + int windex ; + struct reiserfs_transaction_handle th ; + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; + + inode = get_empty_inode() ; + if (!inode) { + return -ENOMEM ; + } + journal_begin(&th, dir->i_sb, jbegin_count) ; + windex = push_journal_writer("reiserfs_mkdir") ; + + if (dir->i_nlink >= REISERFS_LINK_MAX) { + //FIXME: sd_nlink is 32 bit now + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + iput(inode) ; + return -EMLINK; + } + /* inc the link count now, so another writer doesn't overflow it while + ** we sleep later on. + */ + dir->i_nlink ++; + + mode = S_IFDIR | mode; + inode = reiserfs_new_inode (&th, dir, mode, 0/*symlink*/, + old_format_only (dir->i_sb) ? EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE, + dentry, inode, &retval); + if (!inode) { + pop_journal_writer(windex) ; + dir->i_nlink-- ; + journal_end(&th, dir->i_sb, jbegin_count) ; + return retval; + } + + inode->i_op = &reiserfs_dir_inode_operations; + inode->i_fop = &reiserfs_dir_operations; + + // note, _this_ add_entry will not update dir's stat data + retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, + inode, 1/*visible*/); + if (retval) { + inode->i_nlink = 0; + dir->i_nlink-- ; + reiserfs_update_sd (&th, inode); + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + iput (inode); + return retval; + } + + // the above add_entry did not update dir's stat data + reiserfs_update_sd (&th, dir); + + d_instantiate(dentry, inode); + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + return 0; +} + +static inline int reiserfs_empty_dir(struct inode *inode) { + /* we can cheat because an old format dir cannot have + ** EMPTY_DIR_SIZE, and a new format dir cannot have + ** EMPTY_DIR_SIZE_V1. So, if the inode is either size, + ** regardless of disk format version, the directory is empty. + */ + if (inode->i_size != EMPTY_DIR_SIZE && + inode->i_size != EMPTY_DIR_SIZE_V1) { + return 0 ; + } + return 1 ; +} + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +int reiserfs_rmdir (struct inode * dir, struct dentry *dentry) +{ + int retval; + struct inode * inode; + int windex ; + struct reiserfs_transaction_handle th ; + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; + INITIALIZE_PATH (path); + struct reiserfs_dir_entry de; + + + journal_begin(&th, dir->i_sb, jbegin_count) ; + windex = push_journal_writer("reiserfs_rmdir") ; + + de.de_gen_number_bit_string = 0; + if (reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path, &de) == NAME_NOT_FOUND) { + retval = -ENOENT; + goto end_rmdir; + } + inode = dentry->d_inode; + + if (de.de_objectid != inode->i_ino) { + // FIXME: compare key of an object and a key found in the + // entry + retval = -EIO; + goto end_rmdir; + } + if (!reiserfs_empty_dir(inode)) { + retval = -ENOTEMPTY; + goto end_rmdir; + } + + /* cut entry from dir directory */ + retval = reiserfs_cut_from_item (&th, &path, &(de.de_entry_key), dir, + NULL, /* page */ + 0/*new file size - not used here*/); + if (retval < 0) + goto end_rmdir; + + if (inode->i_nlink != 2) + printk ("reiserfs_rmdir: empty directory has nlink != 2 (%d)\n", inode->i_nlink); + + inode->i_nlink = 0; + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + reiserfs_update_sd (&th, inode); + + dir->i_nlink --; + dir->i_size -= (DEH_SIZE + de.de_entrylen); + dir->i_blocks = ((dir->i_size + 511) >> 9); + reiserfs_update_sd (&th, dir); + + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + reiserfs_check_path(&path) ; + return 0; + + end_rmdir: + /* we must release path, because we did not call + reiserfs_cut_from_item, or reiserfs_cut_from_item does not + release path if operation was not complete */ + pathrelse (&path); + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + return retval; +} + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +int reiserfs_unlink (struct inode * dir, struct dentry *dentry) +{ + int retval; + struct inode * inode; + struct reiserfs_dir_entry de; + INITIALIZE_PATH (path); + int windex ; + struct reiserfs_transaction_handle th ; + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; + + journal_begin(&th, dir->i_sb, jbegin_count) ; + windex = push_journal_writer("reiserfs_unlink") ; + + de.de_gen_number_bit_string = 0; + if (reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path, &de) == NAME_NOT_FOUND) { + retval = -ENOENT; + goto end_unlink; + } + inode = dentry->d_inode; + + if (de.de_objectid != inode->i_ino) { + // FIXME: compare key of an object and a key found in the + // entry + retval = -EIO; + goto end_unlink; + } + + if (!inode->i_nlink) { + printk("reiserfs_unlink: deleting nonexistent file (%s:%lu), %d\n", + kdevname(inode->i_dev), inode->i_ino, inode->i_nlink); + inode->i_nlink = 1; + } + + retval = reiserfs_cut_from_item (&th, &path, &(de.de_entry_key), dir, NULL, 0); + if (retval < 0) + goto end_unlink; + + inode->i_nlink--; + inode->i_ctime = CURRENT_TIME; + reiserfs_update_sd (&th, inode); + + dir->i_size -= (de.de_entrylen + DEH_SIZE); + dir->i_blocks = ((dir->i_size + 511) >> 9); + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + reiserfs_update_sd (&th, dir); + + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + reiserfs_check_path(&path) ; + return 0; + + end_unlink: + pathrelse (&path); + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + reiserfs_check_path(&path) ; + return retval; +} + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +int reiserfs_symlink (struct inode * dir, struct dentry * dentry, const char * symname) +{ + int retval; + struct inode * inode; + char * name; + int item_len; + int windex ; + struct reiserfs_transaction_handle th ; + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; + + + inode = get_empty_inode() ; + if (!inode) { + return -ENOMEM ; + } + + item_len = ROUND_UP (strlen (symname)); + if (item_len > MAX_ITEM_LEN (dir->i_sb->s_blocksize)) { + iput(inode) ; + return -ENAMETOOLONG; + } + + name = kmalloc (item_len, GFP_BUFFER); + if (!name) { + iput(inode) ; + return -ENOMEM; + } + memcpy (name, symname, strlen (symname)); + padd_item (name, item_len, strlen (symname)); + + journal_begin(&th, dir->i_sb, jbegin_count) ; + windex = push_journal_writer("reiserfs_symlink") ; + + inode = reiserfs_new_inode (&th, dir, S_IFLNK | S_IRWXUGO, name, strlen (symname), dentry, + inode, &retval); + kfree (name); + if (inode == 0) { /* reiserfs_new_inode iputs for us */ + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + return retval; + } + + inode->i_op = &page_symlink_inode_operations; + inode->i_mapping->a_ops = &reiserfs_address_space_operations; + + // must be sure this inode is written with this transaction + // + //reiserfs_update_sd (&th, inode, READ_BLOCKS); + + retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, + inode, 1/*visible*/); + if (retval) { + inode->i_nlink--; + reiserfs_update_sd (&th, inode); + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + iput (inode); + return retval; + } + + d_instantiate(dentry, inode); + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + return 0; +} + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +int reiserfs_link (struct dentry * old_dentry, struct inode * dir, struct dentry * dentry) +{ + int retval; + struct inode *inode = old_dentry->d_inode; + int windex ; + struct reiserfs_transaction_handle th ; + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; + + + if (S_ISDIR(inode->i_mode)) + return -EPERM; + + if (inode->i_nlink >= REISERFS_LINK_MAX) { + //FIXME: sd_nlink is 32 bit for new files + return -EMLINK; + } + + journal_begin(&th, dir->i_sb, jbegin_count) ; + windex = push_journal_writer("reiserfs_link") ; + + /* create new entry */ + retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, + inode, 1/*visible*/); + if (retval) { + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + return retval; + } + + inode->i_nlink++; + inode->i_ctime = CURRENT_TIME; + reiserfs_update_sd (&th, inode); + + atomic_inc(&inode->i_count) ; + d_instantiate(dentry, inode); + pop_journal_writer(windex) ; + journal_end(&th, dir->i_sb, jbegin_count) ; + return 0; +} + + +// de contains information pointing to an entry which +static int de_still_valid (const char * name, int len, struct reiserfs_dir_entry * de) +{ + struct reiserfs_dir_entry tmp = *de; + + // recalculate pointer to name and name length + set_de_name_and_namelen (&tmp); + // FIXME: could check more + if (tmp.de_namelen != len || memcmp (name, de->de_name, len)) + return 0; + return 1; +} + + +static int entry_points_to_object (const char * name, int len, struct reiserfs_dir_entry * de, struct inode * inode) +{ + if (!de_still_valid (name, len, de)) + return 0; + + if (inode) { + if (!de_visible (de->de_deh + de->de_entry_num)) + reiserfs_panic (0, "vs-7042: entry_points_to_object: entry must be visible"); + return (de->de_objectid == inode->i_ino) ? 1 : 0; + } + + /* this must be added hidden entry */ + if (de_visible (de->de_deh + de->de_entry_num)) + reiserfs_panic (0, "vs-7043: entry_points_to_object: entry must be visible"); + + return 1; +} + + +/* sets key of objectd the entry has to point to */ +static void set_ino_in_dir_entry (struct reiserfs_dir_entry * de, struct key * key) +{ + de->de_deh[de->de_entry_num].deh_dir_id = key->k_dir_id; + de->de_deh[de->de_entry_num].deh_objectid = key->k_objectid; +} + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +/* + * process, that is going to call fix_nodes/do_balance must hold only + * one path. If it holds 2 or more, it can get into endless waiting in + * get_empty_nodes or its clones + */ +int reiserfs_rename (struct inode * old_dir, struct dentry *old_dentry, + struct inode * new_dir, struct dentry *new_dentry) +{ + int retval; + INITIALIZE_PATH (old_entry_path); + INITIALIZE_PATH (new_entry_path); + INITIALIZE_PATH (dot_dot_entry_path); + struct item_head new_entry_ih, old_entry_ih ; + struct reiserfs_dir_entry old_de, new_de, dot_dot_de; + struct inode * old_inode, * new_inode; + int windex ; + struct reiserfs_transaction_handle th ; + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; + + + old_inode = old_dentry->d_inode; + new_inode = new_dentry->d_inode; + + // make sure, that oldname still exists and points to an object we + // are going to rename + old_de.de_gen_number_bit_string = 0; + retval = reiserfs_find_entry (old_dir, old_dentry->d_name.name, old_dentry->d_name.len, + &old_entry_path, &old_de); + pathrelse (&old_entry_path); + if (retval != NAME_FOUND && old_de.de_objectid != old_inode->i_ino) { + // FIXME: IO error is possible here, but as ext2_rename does + // not detect it we do not as well + + return -ENOENT; + } + + if (S_ISDIR(old_inode->i_mode)) { + // make sure, that directory being renamed has correct ".." + // and that its new parent directory has not too many links + // already + + if (new_inode) { + if (!reiserfs_empty_dir(new_inode)) { + return -ENOTEMPTY; + } + } + + /* directory is renamed, its parent directory will be changed, + ** so find ".." entry + */ + dot_dot_de.de_gen_number_bit_string = 0; + retval = reiserfs_find_entry (old_inode, "..", 2, &dot_dot_entry_path, &dot_dot_de); + pathrelse (&dot_dot_entry_path); + if (retval != NAME_FOUND) + return -EIO; + + /* inode number of .. must equal old_dir->i_ino */ + if (dot_dot_de.de_objectid != old_dir->i_ino) + return -EIO; + + if (!new_inode && new_dir->i_nlink >= REISERFS_LINK_MAX) + return -EMLINK; + } + + + journal_begin(&th, old_dir->i_sb, jbegin_count) ; + windex = push_journal_writer("reiserfs_rename") ; + + + /* add new entry (or find the existing one) */ + retval = reiserfs_add_entry (&th, new_dir, new_dentry->d_name.name, new_dentry->d_name.len, + old_inode, 0); + if (retval == -EEXIST) { + // FIXME: is it possible, that new_inode == 0 here? If yes, it + // is not clear how does ext2 handle that + if (!new_inode) { + printk ("reiserfs_rename: new entry is found, new inode == 0\n"); + BUG (); + } + } else if (retval) { + pop_journal_writer(windex) ; + journal_end(&th, old_dir->i_sb, jbegin_count) ; + return retval; + } + + + while (1) { + // look for old name using corresponding entry key (found by reiserfs_find_entry) + if (search_by_entry_key (new_dir->i_sb, &old_de.de_entry_key, &old_entry_path, &old_de) != NAME_FOUND) + BUG (); + + copy_item_head(&old_entry_ih, get_ih(&old_entry_path)) ; + + // look for new name by reiserfs_find_entry + new_de.de_gen_number_bit_string = 0; + retval = reiserfs_find_entry (new_dir, new_dentry->d_name.name, new_dentry->d_name.len, + &new_entry_path, &new_de); + if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) + BUG (); + + copy_item_head(&new_entry_ih, get_ih(&new_entry_path)) ; + + reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1) ; + + if (S_ISDIR(old_inode->i_mode)) { + if (search_by_entry_key (new_dir->i_sb, &dot_dot_de.de_entry_key, &dot_dot_entry_path, &dot_dot_de) != NAME_FOUND) + BUG (); + // node containing ".." gets into transaction + reiserfs_prepare_for_journal(old_inode->i_sb, dot_dot_de.de_bh, 1) ; + } + + /* sanity checking before doing the rename - avoid races many + ** of the above checks could have scheduled. We have to be + ** sure our items haven't been shifted by another process. + */ + if (!entry_points_to_object(new_dentry->d_name.name, + new_dentry->d_name.len, + &new_de, new_inode) || + item_moved(&new_entry_ih, &new_entry_path) || + item_moved(&old_entry_ih, &old_entry_path) || + !entry_points_to_object (old_dentry->d_name.name, + old_dentry->d_name.len, + &old_de, old_inode)) { + reiserfs_restore_prepared_buffer (old_inode->i_sb, new_de.de_bh); + if (S_ISDIR(old_inode->i_mode)) + reiserfs_restore_prepared_buffer (old_inode->i_sb, dot_dot_de.de_bh); +#if 0 + // FIXME: do we need this? shouldn't we simply continue? + run_task_queue(&tq_disk); + current->policy |= SCHED_YIELD; + /*current->counter = 0;*/ + schedule(); +#endif + continue; + } + +#ifdef CONFIG_REISERFS_CHECK + if (S_ISDIR(old_inode->i_mode) && + (!entry_points_to_object ("..", 2, &dot_dot_de, old_dir) || + !reiserfs_buffer_prepared(dot_dot_de.de_bh))) { + // this should be not changed + BUG (); + } +#endif + + break; + } + + /* ok, all the changes can be done in one fell swoop when we + have claimed all the buffers needed.*/ + + mark_de_visible (new_de.de_deh + new_de.de_entry_num); + set_ino_in_dir_entry (&new_de, INODE_PKEY (old_inode)); + journal_mark_dirty (&th, old_dir->i_sb, new_de.de_bh); + + mark_de_hidden (old_de.de_deh + old_de.de_entry_num); + old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; + new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME; + + if (new_inode) { + // adjust link number of the victim + new_inode->i_nlink--; + new_inode->i_ctime = CURRENT_TIME; + } + + if (S_ISDIR(old_inode->i_mode)) { + //if (dot_dot_de.de_bh) { + // adjust ".." of renamed directory + set_ino_in_dir_entry (&dot_dot_de, INODE_PKEY (new_dir)); + journal_mark_dirty (&th, new_dir->i_sb, dot_dot_de.de_bh); + + old_dir->i_nlink--; + if (new_inode) { + new_inode->i_nlink--; + } else { + new_dir->i_nlink++; + } + } + + // looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse + pathrelse (&new_entry_path); + pathrelse (&dot_dot_entry_path); + + // FIXME: this reiserfs_cut_from_item's return value may screw up + // anybody, but it will panic if will not be able to find the + // entry. This needs one more clean up + if (reiserfs_cut_from_item (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL, 0) < 0) + reiserfs_warning ("vs-: reiserfs_rename: coudl not cut old name. Fsck later?\n"); + + old_dir->i_size -= DEH_SIZE + old_de.de_entrylen; + old_dir->i_blocks = ((old_dir->i_size + 511) >> 9); + + reiserfs_update_sd (&th, old_dir); + reiserfs_update_sd (&th, new_dir); + if (new_inode) + reiserfs_update_sd (&th, new_inode); + + pop_journal_writer(windex) ; + journal_end(&th, old_dir->i_sb, jbegin_count) ; + return 0; +} + diff -u -r --new-file linux/fs/reiserfs/objectid.c v2.4.0-test8/linux/fs/reiserfs/objectid.c --- linux/fs/reiserfs/objectid.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/objectid.c Sun May 21 17:26:44 2000 @@ -0,0 +1,210 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ +#ifdef __KERNEL__ + +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/sched.h> +#include <linux/reiserfs_fs.h> + +#else + +#include "nokernel.h" + +#endif + + +// find where objectid map starts +#define objectid_map(s,rs) (old_format_only (s) ? \ + (__u32 *)((struct reiserfs_super_block_v1 *)rs + 1) :\ + (__u32 *)(rs + 1)) + + +#ifdef CONFIG_REISERFS_CHECK + +static void check_objectid_map (struct super_block * s, __u32 * map) +{ + if (le32_to_cpu (map[0]) != 1) + reiserfs_panic (s, "vs-15010: check_objectid_map: map corrupted"); + + // FIXME: add something else here +} + +#endif + + +/* When we allocate objectids we allocate the first unused objectid. + Each sequence of objectids in use (the odd sequences) is followed + by a sequence of objectids not in use (the even sequences). We + only need to record the last objectid in each of these sequences + (both the odd and even sequences) in order to fully define the + boundaries of the sequences. A consequence of allocating the first + objectid not in use is that under most conditions this scheme is + extremely compact. The exception is immediately after a sequence + of operations which deletes a large number of objects of + non-sequential objectids, and even then it will become compact + again as soon as more objects are created. Note that many + interesting optimizations of layout could result from complicating + objectid assignment, but we have deferred making them for now. */ + + +/* get unique object identifier */ +__u32 reiserfs_get_unused_objectid (struct reiserfs_transaction_handle *th) +{ + struct super_block * s = th->t_super; + struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); + __u32 * map = objectid_map (s, rs); + __u32 unused_objectid; + + +#ifdef CONFIG_REISERFS_CHECK + check_objectid_map (s, map); +#endif + + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; + /* comment needed -Hans */ + unused_objectid = le32_to_cpu (map[1]); + if (unused_objectid == U32_MAX) { + printk ("REISERFS: get_objectid: no more object ids\n"); + reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s)) ; + return 0; + } + + /* This incrementation allocates the first unused objectid. That + is to say, the first entry on the objectid map is the first + unused objectid, and by incrementing it we use it. See below + where we check to see if we eliminated a sequence of unused + objectids.... */ + map[1] = cpu_to_le32 (unused_objectid + 1); + + /* Now we check to see if we eliminated the last remaining member of + the first even sequence (and can eliminate the sequence by + eliminating its last objectid from oids), and can collapse the + first two odd sequences into one sequence. If so, then the net + result is to eliminate a pair of objectids from oids. We do this + by shifting the entire map to the left. */ + if (le16_to_cpu (rs->s_oid_cursize) > 2 && map[1] == map[2]) { + memmove (map + 1, map + 3, (le16_to_cpu (rs->s_oid_cursize) - 3) * sizeof(__u32)); + //rs->s_oid_cursize -= 2; + rs->s_oid_cursize = cpu_to_le16 (le16_to_cpu (rs->s_oid_cursize) - 2); + } + + journal_mark_dirty(th, s, SB_BUFFER_WITH_SB (s)); + s->s_dirt = 1; + return unused_objectid; +} + + +/* makes object identifier unused */ +void reiserfs_release_objectid (struct reiserfs_transaction_handle *th, + __u32 objectid_to_release) +{ + struct super_block * s = th->t_super; + struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); + __u32 * map = objectid_map (s, rs); + int i = 0; + + //return; +#ifdef CONFIG_REISERFS_CHECK + check_objectid_map (s, map); +#endif + + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; + journal_mark_dirty(th, s, SB_BUFFER_WITH_SB (s)); + s->s_dirt = 1; + + + /* start at the beginning of the objectid map (i = 0) and go to + the end of it (i = disk_sb->s_oid_cursize). Linear search is + what we use, though it is possible that binary search would be + more efficient after performing lots of deletions (which is + when oids is large.) We only check even i's. */ + while (i < le16_to_cpu (rs->s_oid_cursize)) { + if (objectid_to_release == le32_to_cpu (map[i])) { + /* This incrementation unallocates the objectid. */ + //map[i]++; + map[i] = cpu_to_le32 (le32_to_cpu (map[i]) + 1); + + /* Did we unallocate the last member of an odd sequence, and can shrink oids? */ + if (map[i] == map[i+1]) { + /* shrink objectid map */ + memmove (map + i, map + i + 2, + (le16_to_cpu (rs->s_oid_cursize) - i - 2) * sizeof (__u32)); + //disk_sb->s_oid_cursize -= 2; + rs->s_oid_cursize = cpu_to_le16 (le16_to_cpu (rs->s_oid_cursize) - 2); + +#ifdef CONFIG_REISERFS_CHECK + if (le16_to_cpu (rs->s_oid_cursize) < 2 || + le16_to_cpu (rs->s_oid_cursize) > le16_to_cpu (rs->s_oid_maxsize)) + reiserfs_panic (s, "vs-15005: reiserfs_release_objectid: " + "objectid map corrupted cur_size == %d (max == %d)", + le16_to_cpu (rs->s_oid_cursize), le16_to_cpu (rs->s_oid_maxsize)); +#endif + } + return; + } + + if (objectid_to_release > le32_to_cpu (map[i]) && + objectid_to_release < le32_to_cpu (map[i + 1])) { + /* size of objectid map is not changed */ + if (objectid_to_release + 1 == le32_to_cpu (map[i + 1])) { + //objectid_map[i+1]--; + map[i + 1] = cpu_to_le32 (le32_to_cpu (map[i + 1]) - 1); + return; + } + + if (rs->s_oid_cursize == rs->s_oid_maxsize) + /* objectid map must be expanded, but there is no space */ + return; + + /* expand the objectid map*/ + memmove (map + i + 3, map + i + 1, + (le16_to_cpu (rs->s_oid_cursize) - i - 1) * sizeof(__u32)); + map[i + 1] = cpu_to_le32 (objectid_to_release); + map[i + 2] = cpu_to_le32 (objectid_to_release + 1); + rs->s_oid_cursize = cpu_to_le16 (le16_to_cpu (rs->s_oid_cursize) + 2); + return; + } + i += 2; + } + + reiserfs_warning ("vs-15010: reiserfs_release_objectid: tried to free free object id (%lu)", + objectid_to_release); +} + + +int reiserfs_convert_objectid_map_v1(struct super_block *s) { + struct reiserfs_super_block *disk_sb = SB_DISK_SUPER_BLOCK (s); + int cur_size = le16_to_cpu(disk_sb->s_oid_cursize) ; + int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2 ; + int old_max = le16_to_cpu(disk_sb->s_oid_maxsize) ; + struct reiserfs_super_block_v1 *disk_sb_v1 ; + __u32 *objectid_map, *new_objectid_map ; + int i ; + + disk_sb_v1=(struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data); + objectid_map = (__u32 *)(disk_sb_v1 + 1) ; + new_objectid_map = (__u32 *)(disk_sb + 1) ; + + if (cur_size > new_size) { + /* mark everyone used that was listed as free at the end of the objectid + ** map + */ + objectid_map[new_size - 1] = objectid_map[cur_size - 1] ; + disk_sb->s_oid_cursize = cpu_to_le16(new_size) ; + } + /* move the smaller objectid map past the end of the new super */ + for (i = new_size - 1 ; i >= 0 ; i--) { + objectid_map[i + (old_max - new_size)] = objectid_map[i] ; + } + + + /* set the max size so we don't overflow later */ + disk_sb->s_oid_maxsize = cpu_to_le16(new_size) ; + + /* finally, zero out the unused chunk of the new super */ + memset(disk_sb->s_unused, 0, sizeof(disk_sb->s_unused)) ; + return 0 ; +} + diff -u -r --new-file linux/fs/reiserfs/prints.c v2.4.0-test8/linux/fs/reiserfs/prints.c --- linux/fs/reiserfs/prints.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/prints.c Mon Sep 11 05:21:50 2000 @@ -0,0 +1,878 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ +#ifdef __KERNEL__ + +#include <stdarg.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/reiserfs_fs.h> +#include <linux/string.h> + +#else + +#include "nokernel.h" +#include <stdarg.h> +#include <limits.h> + +#endif + +static char error_buf[1024]; +static char fmt_buf[1024]; +static char off_buf[80]; + + +static char * cpu_offset (struct cpu_key * key) +{ + if (cpu_key_k_type(key) == TYPE_DIRENTRY) + sprintf (off_buf, "%Lu(%Lu)", GET_HASH_VALUE (cpu_key_k_offset (key)), + GET_GENERATION_NUMBER (cpu_key_k_offset (key))); + else + sprintf (off_buf, "0x%Lx", cpu_key_k_offset (key)); + return off_buf; +} + + +static char * le_offset (struct key * key) +{ + int version; + + version = le_key_version (key); + if (le_key_k_type (version, key) == TYPE_DIRENTRY) + sprintf (off_buf, "%Lu(%Lu)", GET_HASH_VALUE (le_key_k_offset (version, key)), + GET_GENERATION_NUMBER (le_key_k_offset (version, key))); + else + sprintf (off_buf, "0x%Lx", le_key_k_offset (version, key)); + return off_buf; +} + + +static char * cpu_type (struct cpu_key * key) +{ + if (cpu_key_k_type (key) == TYPE_STAT_DATA) + return "SD"; + if (cpu_key_k_type (key) == TYPE_DIRENTRY) + return "DIR"; + if (cpu_key_k_type (key) == TYPE_DIRECT) + return "DIRECT"; + if (cpu_key_k_type (key) == TYPE_INDIRECT) + return "IND"; + return "UNKNOWN"; +} + + +static char * le_type (struct key * key) +{ + int version; + + version = le_key_version (key); + + if (le_key_k_type (version, key) == TYPE_STAT_DATA) + return "SD"; + if (le_key_k_type (version, key) == TYPE_DIRENTRY) + return "DIR"; + if (le_key_k_type (version, key) == TYPE_DIRECT) + return "DIRECT"; + if (le_key_k_type (version, key) == TYPE_INDIRECT) + return "IND"; + return "UNKNOWN"; +} + + +/* %k */ +static void sprintf_le_key (char * buf, struct key * key) +{ + if (key) + sprintf (buf, "[%d %d %s %s]", le32_to_cpu (key->k_dir_id), + le32_to_cpu (key->k_objectid), le_offset (key), le_type (key)); + else + sprintf (buf, "[NULL]"); +} + + +/* %K */ +static void sprintf_cpu_key (char * buf, struct cpu_key * key) +{ + if (key) + sprintf (buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id, + key->on_disk_key.k_objectid, cpu_offset (key), cpu_type (key)); + else + sprintf (buf, "[NULL]"); +} + + +static void sprintf_item_head (char * buf, struct item_head * ih) +{ + if (ih) { + sprintf (buf, "%s", (ih_version (ih) == ITEM_VERSION_2) ? "*NEW* " : "*OLD*"); + sprintf_le_key (buf + strlen (buf), &(ih->ih_key)); + sprintf (buf + strlen (buf), ", item_len %d, item_location %d, " + "free_space(entry_count) %d", + ih->ih_item_len, ih->ih_item_location, ih_free_space (ih)); + } else + sprintf (buf, "[NULL]"); +} + + +static void sprintf_direntry (char * buf, struct reiserfs_dir_entry * de) +{ + char name[20]; + + memcpy (name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen); + name [de->de_namelen > 19 ? 19 : de->de_namelen] = 0; + sprintf (buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid); +} + + +static void sprintf_block_head (char * buf, struct buffer_head * bh) +{ + sprintf (buf, "level=%d, nr_items=%d, free_space=%d rdkey ", + B_LEVEL (bh), B_NR_ITEMS (bh), B_FREE_SPACE (bh)); +#if 0 + if (B_LEVEL (bh) == DISK_LEAF_NODE_LEVEL) + sprintf_le_key (buf + strlen (buf), B_PRIGHT_DELIM_KEY (bh)); +#endif +} + + +static void sprintf_buffer_head (char * buf, struct buffer_head * bh) +{ + sprintf (buf, "dev %s, size %d, blocknr %ld, count %d, list %d, state 0x%lx, page %p, (%s, %s, %s)", + kdevname (bh->b_dev), bh->b_size, bh->b_blocknr, atomic_read (&(bh->b_count)), bh->b_list, + bh->b_state, bh->b_page, + buffer_uptodate (bh) ? "UPTODATE" : "!UPTODATE", + buffer_dirty (bh) ? "DIRTY" : "CLEAN", + buffer_locked (bh) ? "LOCKED" : "UNLOCKED"); +} + + +static void sprintf_disk_child (char * buf, struct disk_child * dc) +{ + sprintf (buf, "[dc_number=%d, dc_size=%u]", dc->dc_block_number, dc->dc_size); +} + + +static char * is_there_reiserfs_struct (char * fmt, int * what, int * skip) +{ + char * k = fmt; + + *skip = 0; + + while (1) { + k = strstr (k, "%"); + if (!k) + break; + if (k && (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' || + k[1] == 'z' || k[1] == 'b' || k[1] == 'y')) { + *what = k[1]; + break; + } + (*skip) ++; + k ++; + } + return k; +} + + +/* debugging reiserfs we used to print out a lot of different + variables, like keys, item headers, buffer heads etc. Values of + most fields matter. So it took a long time just to write + appropriative printk. With this reiserfs_warning you can use format + specification for complex structures like you used to do with + printfs for integers, doubles and pointers. For instance, to print + out key structure you have to write just: + reiserfs_warning ("bad key %k", key); + instead of + printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid, + key->k_offset, key->k_uniqueness); +*/ + +#define do_reiserfs_warning \ +{\ + char * fmt1 = fmt_buf;\ + va_list args;\ + int i, j;\ + char * k;\ + char * p = error_buf;\ + int what, skip;\ +\ + strcpy (fmt1, fmt);\ + va_start(args, fmt);\ +\ + while (1) {\ + k = is_there_reiserfs_struct (fmt1, &what, &skip);\ + if (k != 0) {\ + *k = 0;\ + p += vsprintf (p, fmt1, args);\ +\ + for (i = 0; i < skip; i ++)\ + j = va_arg (args, int);\ +\ + switch (what) {\ + case 'k':\ + sprintf_le_key (p, va_arg(args, struct key *));\ + break;\ + case 'K':\ + sprintf_cpu_key (p, va_arg(args, struct cpu_key *));\ + break;\ + case 'h':\ + sprintf_item_head (p, va_arg(args, struct item_head *));\ + break;\ + case 't':\ + sprintf_direntry (p, va_arg(args, struct reiserfs_dir_entry *));\ + break;\ + case 'y':\ + sprintf_disk_child (p, va_arg(args, struct disk_child *));\ + break;\ + case 'z':\ + sprintf_block_head (p, va_arg(args, struct buffer_head *));\ + break;\ + case 'b':\ + sprintf_buffer_head (p, va_arg(args, struct buffer_head *));\ + break;\ + }\ + p += strlen (p);\ + fmt1 = k + 2;\ + } else {\ + i = vsprintf (p, fmt1, args);\ + break;\ + }\ + }\ +\ + va_end(args);\ +} + + +/* in addition to usual conversion specifiers this accepts reiserfs + specific conversion specifiers: + %k to print little endian key, + %K to print cpu key, + %h to print item_head, + %t to print directory entry + %z to print block head (arg must be struct buffer_head * + %b to print buffer_head +*/ +void reiserfs_warning (const char * fmt, ...) +{ + do_reiserfs_warning; + /* console_print (error_buf); */ + printk ("%s", error_buf); +} + +void reiserfs_debug (struct super_block *s, int level, const char * fmt, ...) +{ +#ifdef CONFIG_REISERFS_CHECK + do_reiserfs_warning; + printk ("%s", error_buf); +#else + ; +#endif +} + +/* The format: + + maintainer-errorid: [function-name:] message + + where errorid is unique to the maintainer and function-name is + optional, is recommended, so that anyone can easily find the bug + with a simple grep for the short to type string + maintainer-errorid. Don't bother with reusing errorids, there are + lots of numbers out there. + + Example: + + reiserfs_panic( + p_sb, "reiser-29: reiserfs_new_blocknrs: " + "one of search_start or rn(%d) is equal to MAX_B_NUM," + "which means that we are optimizing location based on the bogus location of a temp buffer (%p).", + rn, bh + ); + + Regular panic()s sometimes clear the screen before the message can + be read, thus the need for the while loop. + + Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it + pointless complexity): + + panics in reiserfs_fs.h have numbers from 1000 to 1999 + super.c 2000 to 2999 + preserve.c 3000 to 3999 + bitmap.c 4000 to 4999 + stree.c 5000 to 5999 + prints.c 6000 to 6999 + namei.c 7000 to 7999 + fix_nodes.c 8000 to 8999 + dir.c 9000 to 9999 + lbalance.c 10000 to 10999 + ibalance.c 11000 to 11999 not ready + do_balan.c 12000 to 12999 + inode.c 13000 to 13999 + file.c 14000 to 14999 + objectid.c 15000 - 15999 + buffer.c 16000 - 16999 + symlink.c 17000 - 17999 + + . */ + + +#ifdef CONFIG_REISERFS_CHECK +extern struct tree_balance * cur_tb; +#endif + +void reiserfs_panic (struct super_block * sb, const char * fmt, ...) +{ +#ifdef __KERNEL__ + show_reiserfs_locks() ; +#endif + do_reiserfs_warning; + printk ("%s", error_buf); + BUG (); + // console_print (error_buf); + // for (;;); + +#ifdef __KERNEL__ + + /* comment before release */ + //for (;;); + +#if 0 /* this is not needed, the state is ignored */ + if (sb && !(sb->s_flags & MS_RDONLY)) { + sb->u.reiserfs_sb.s_mount_state |= REISERFS_ERROR_FS; + sb->u.reiserfs_sb.s_rs->s_state = REISERFS_ERROR_FS; + + mark_buffer_dirty(sb->u.reiserfs_sb.s_sbh) ; + sb->s_dirt = 1; + } +#endif + + /* this is to prevent panic from syncing this filesystem */ + if (sb && sb->s_lock) + sb->s_lock=0; + if (sb) + sb->s_flags |= MS_RDONLY; + + panic ("REISERFS: panic (device %s): %s\n", + sb ? kdevname(sb->s_dev) : "sb == 0", error_buf); +#else + exit (0); +#endif +} + + +void print_virtual_node (struct virtual_node * vn) +{ + int i; + struct virtual_item * vi; + + printk ("VIRTUAL NODE CONTAINS %d items, has size %d,%s,%s, ITEM_POS=%d POS_IN_ITEM=%d MODE=\'%c\'\n", + vn->vn_nr_item, vn->vn_size, + (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE )? "left mergeable" : "", + (vn->vn_vi[vn->vn_nr_item - 1].vi_type & VI_TYPE_RIGHT_MERGEABLE) ? "right mergeable" : "", + vn->vn_affected_item_num, vn->vn_pos_in_item, vn->vn_mode); + + vi = vn->vn_vi; + for (i = 0; i < vn->vn_nr_item; i ++, vi ++) + op_print_vi (vi); + +} + + +void print_path (struct tree_balance * tb, struct path * path) +{ + int h = 0; + struct buffer_head * bh; + + if (tb) { + while (tb->insert_size[h]) { + bh = PATH_H_PBUFFER (path, h); + printk ("block %lu (level=%d), position %d\n", bh ? bh->b_blocknr : 0, + bh ? B_LEVEL (bh) : 0, PATH_H_POSITION (path, h)); + h ++; + } + } else { + int offset = path->path_length; + struct buffer_head * bh; + printk ("Offset Bh (b_blocknr, b_count) Position Nr_item\n"); + while ( offset > ILLEGAL_PATH_ELEMENT_OFFSET ) { + bh = PATH_OFFSET_PBUFFER (path, offset); + printk ("%6d %10p (%9lu, %7d) %8d %7d\n", offset, + bh, bh ? bh->b_blocknr : 0, bh ? atomic_read (&(bh->b_count)) : 0, + PATH_OFFSET_POSITION (path, offset), bh ? B_NR_ITEMS (bh) : -1); + + offset --; + } + } + +} + + +/* this prints internal nodes (4 keys/items in line) (dc_number, + dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number, + dc_size)...*/ +static int print_internal (struct buffer_head * bh, int first, int last) +{ + struct key * key; + struct disk_child * dc; + int i; + int from, to; + + if (!B_IS_KEYS_LEVEL (bh)) + return 1; + + check_internal (bh); + + if (first == -1) { + from = 0; + to = B_NR_ITEMS (bh); + } else { + from = first; + to = last < B_NR_ITEMS (bh) ? last : B_NR_ITEMS (bh); + } + + reiserfs_warning ("INTERNAL NODE (%ld) contains %z\n", bh->b_blocknr, bh); + + dc = B_N_CHILD (bh, from); + reiserfs_warning ("PTR %d: %y ", from, dc); + + for (i = from, key = B_N_PDELIM_KEY (bh, from), dc ++; i < to; i ++, key ++, dc ++) { + reiserfs_warning ("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc); + if (i && i % 4 == 0) + printk ("\n"); + } + printk ("\n"); + return 0; +} + + + + + +static int print_leaf (struct buffer_head * bh, int print_mode, int first, int last) +{ + struct block_head * blkh; + struct item_head * ih; + int i; + int from, to; + + if (!B_IS_ITEMS_LEVEL (bh)) + return 1; + + check_leaf (bh); + + blkh = B_BLK_HEAD (bh); + ih = B_N_PITEM_HEAD (bh,0); + + printk ("\n===================================================================\n"); + reiserfs_warning ("LEAF NODE (%ld) contains %z\n", bh->b_blocknr, bh); + + if (!(print_mode & PRINT_LEAF_ITEMS)) { + reiserfs_warning ("FIRST ITEM_KEY: %k, LAST ITEM KEY: %k\n", + &(ih->ih_key), &((ih + le16_to_cpu (blkh->blk_nr_item) - 1)->ih_key)); + return 0; + } + + if (first < 0 || first > le16_to_cpu (blkh->blk_nr_item) - 1) + from = 0; + else + from = first; + + if (last < 0 || last > le16_to_cpu (blkh->blk_nr_item)) + to = le16_to_cpu (blkh->blk_nr_item); + else + to = last; + + ih += from; + printk ("-------------------------------------------------------------------------------\n"); + printk ("|##| type | key | ilen | free_space | version | loc |\n"); + for (i = from; i < to; i++, ih ++) { + printk ("-------------------------------------------------------------------------------\n"); + reiserfs_warning ("|%2d| %h |\n", i, ih); + if (print_mode & PRINT_LEAF_ITEMS) + op_print_item (ih, B_I_PITEM (bh, ih)); + } + + printk ("===================================================================\n"); + + return 0; +} + +static char * reiserfs_version (char * buf) +{ + __u16 * pversion; + + pversion = (__u16 *)(buf) + 36; + if (*pversion == 0) + return "0"; + if (*pversion == 2) + return "2"; + return "Unknown"; +} + + +/* return 1 if this is not super block */ +static int print_super_block (struct buffer_head * bh) +{ + struct reiserfs_super_block * rs = (struct reiserfs_super_block *)(bh->b_data); + int skipped, data_blocks; + + + if (strncmp (rs->s_magic, REISERFS_SUPER_MAGIC_STRING, strlen ( REISERFS_SUPER_MAGIC_STRING)) && + strncmp (rs->s_magic, REISER2FS_SUPER_MAGIC_STRING, strlen ( REISER2FS_SUPER_MAGIC_STRING))) + return 1; + + printk ("%s\'s super block in block %ld\n======================\n", kdevname (bh->b_dev), bh->b_blocknr); + printk ("Reiserfs version %s\n", reiserfs_version (bh->b_data)); + printk ("Block count %u\n", le32_to_cpu (rs->s_block_count)); + printk ("Blocksize %d\n", le16_to_cpu (rs->s_blocksize)); + printk ("Free blocks %u\n", le32_to_cpu (rs->s_free_blocks)); + skipped = bh->b_blocknr; // FIXME: this would be confusing if + // someone stores reiserfs super block in some data block ;) + data_blocks = le32_to_cpu (rs->s_block_count) - skipped - 1 - + le16_to_cpu (rs->s_bmap_nr) - (le32_to_cpu (rs->s_orig_journal_size) + 1) - + le32_to_cpu (rs->s_free_blocks); + printk ("Busy blocks (skipped %d, bitmaps - %d, journal blocks - %d\n" + "1 super blocks, %d data blocks\n", + skipped, le16_to_cpu (rs->s_bmap_nr), + (le32_to_cpu (rs->s_orig_journal_size) + 1), data_blocks); + printk ("Root block %u\n", le32_to_cpu (rs->s_root_block)); + printk ("Journal block (first) %d\n", le32_to_cpu (rs->s_journal_block)); + printk ("Journal dev %d\n", le32_to_cpu (rs->s_journal_dev)); + printk ("Journal orig size %d\n", le32_to_cpu (rs->s_orig_journal_size)); + printk ("Filesystem state %s\n", + (le16_to_cpu (rs->s_state) == REISERFS_VALID_FS) ? "VALID" : "ERROR"); + printk ("Hash function \"%s\"\n", le16_to_cpu (rs->s_hash_function_code) == TEA_HASH ? "tea" : + ((le16_to_cpu (rs->s_hash_function_code) == YURA_HASH) ? "rupasov" : "unknown")); + +#if 0 + __u32 s_journal_trans_max ; /* max number of blocks in a transaction. */ + __u32 s_journal_block_count ; /* total size of the journal. can change over time */ + __u32 s_journal_max_batch ; /* max number of blocks to batch into a trans */ + __u32 s_journal_max_commit_age ; /* in seconds, how old can an async commit be */ + __u32 s_journal_max_trans_age ; /* in seconds, how old can a transaction be */ +#endif + printk ("Tree height %d\n", rs->s_tree_height); + return 0; +} + + +static int print_desc_block (struct buffer_head * bh) +{ + struct reiserfs_journal_desc * desc; + + desc = (struct reiserfs_journal_desc *)(bh->b_data); + if (memcmp(desc->j_magic, JOURNAL_DESC_MAGIC, 8)) + return 1; + + printk ("Desc block %lu (j_trans_id %d, j_mount_id %d, j_len %d)", + bh->b_blocknr, desc->j_trans_id, desc->j_mount_id, desc->j_len); + + return 0; +} + + +void print_block (struct buffer_head * bh, ...)//int print_mode, int first, int last) +{ + va_list args; + int mode, first, last; + + va_start (args, bh); + + if ( ! bh ) { + printk("print_block: buffer is NULL\n"); + return; + } + + mode = va_arg (args, int); + first = va_arg (args, int); + last = va_arg (args, int); + if (print_leaf (bh, mode, first, last)) + if (print_internal (bh, first, last)) + if (print_super_block (bh)) + if (print_desc_block (bh)) + printk ("Block %ld contains unformatted data\n", bh->b_blocknr); +} + + + +char print_tb_buf[2048]; + +/* this stores initial state of tree balance in the print_tb_buf */ +void store_print_tb (struct tree_balance * tb) +{ + int h = 0; + int i; + struct buffer_head * tbSh, * tbFh; + + if (!tb) + return; + + sprintf (print_tb_buf, "\n" + "BALANCING %d\n" + "MODE=%c, ITEM_POS=%d POS_IN_ITEM=%d\n" + "=====================================================================\n" + "* h * S * L * R * F * FL * FR * CFL * CFR *\n", + tb->tb_sb->u.reiserfs_sb.s_do_balance, + tb->tb_mode, PATH_LAST_POSITION (tb->tb_path), tb->tb_path->pos_in_item); + + for (h = 0; h < sizeof(tb->insert_size) / sizeof (tb->insert_size[0]); h ++) { + if (PATH_H_PATH_OFFSET (tb->tb_path, h) <= tb->tb_path->path_length && + PATH_H_PATH_OFFSET (tb->tb_path, h) > ILLEGAL_PATH_ELEMENT_OFFSET) { + tbSh = PATH_H_PBUFFER (tb->tb_path, h); + tbFh = PATH_H_PPARENT (tb->tb_path, h); + } else { + tbSh = 0; + tbFh = 0; + } + sprintf (print_tb_buf + strlen (print_tb_buf), + "* %d * %3ld(%2d) * %3ld(%2d) * %3ld(%2d) * %5ld * %5ld * %5ld * %5ld * %5ld *\n", + h, + (tbSh) ? (tbSh->b_blocknr):(-1), + (tbSh) ? atomic_read (&(tbSh->b_count)) : -1, + (tb->L[h]) ? (tb->L[h]->b_blocknr):(-1), + (tb->L[h]) ? atomic_read (&(tb->L[h]->b_count)) : -1, + (tb->R[h]) ? (tb->R[h]->b_blocknr):(-1), + (tb->R[h]) ? atomic_read (&(tb->R[h]->b_count)) : -1, + (tbFh) ? (tbFh->b_blocknr):(-1), + (tb->FL[h]) ? (tb->FL[h]->b_blocknr):(-1), + (tb->FR[h]) ? (tb->FR[h]->b_blocknr):(-1), + (tb->CFL[h]) ? (tb->CFL[h]->b_blocknr):(-1), + (tb->CFR[h]) ? (tb->CFR[h]->b_blocknr):(-1)); + } + + sprintf (print_tb_buf + strlen (print_tb_buf), + "=====================================================================\n" + "* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n" + "* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n", + tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0],tb->rbytes, tb->blknum[0], + tb->s0num, tb->s1num,tb->s1bytes, tb->s2num, tb->s2bytes, tb->cur_blknum, tb->lkey[0], tb->rkey[0]); + + /* this prints balance parameters for non-leaf levels */ + h = 0; + do { + h++; + sprintf (print_tb_buf + strlen (print_tb_buf), + "* %d * %4d * %2d * * %2d * * %2d *\n", + h, tb->insert_size[h], tb->lnum[h], tb->rnum[h], tb->blknum[h]); + } while (tb->insert_size[h]); + + sprintf (print_tb_buf + strlen (print_tb_buf), + "=====================================================================\n" + "FEB list: "); + + /* print FEB list (list of buffers in form (bh (b_blocknr, b_count), that will be used for new nodes) */ + h = 0; + for (i = 0; i < sizeof (tb->FEB) / sizeof (tb->FEB[0]); i ++) + sprintf (print_tb_buf + strlen (print_tb_buf), + "%p (%lu %d)%s", tb->FEB[i], tb->FEB[i] ? tb->FEB[i]->b_blocknr : 0, + tb->FEB[i] ? atomic_read (&(tb->FEB[i]->b_count)) : 0, + (i == sizeof (tb->FEB) / sizeof (tb->FEB[0]) - 1) ? "\n" : ", "); + + sprintf (print_tb_buf + strlen (print_tb_buf), + "======================== the end ====================================\n"); +} + +void print_cur_tb (char * mes) +{ + printk ("%s\n%s", mes, print_tb_buf); +} + + +#ifndef __KERNEL__ + +void print_bmap_block (int i, char * data, int size, int silent) +{ + int j, k; + int bits = size * 8; + int zeros = 0, ones = 0; + + + if (test_bit (0, data)) { + /* first block addressed by this bitmap block is used */ + ones ++; + if (!silent) + printf ("Busy (%d-", i * bits); + for (j = 1; j < bits; j ++) { + while (test_bit (j, data)) { + ones ++; + if (j == bits - 1) { + if (!silent) + printf ("%d)\n", j + i * bits); + goto end; + } + j++; + } + if (!silent) + printf ("%d) Free(%d-", j - 1 + i * bits, j + i * bits); + + while (!test_bit (j, data)) { + zeros ++; + if (j == bits - 1) { + if (!silent) + printf ("%d)\n", j + i * bits); + goto end; + } + j++; + } + if (!silent) + printf ("%d) Busy(%d-", j - 1 + i * bits, j + i * bits); + + j --; + end: + } + } else { + /* first block addressed by this bitmap is free */ + zeros ++; + if (!silent) + printf ("Free (%d-", i * bits); + for (j = 1; j < bits; j ++) { + k = 0; + while (!test_bit (j, data)) { + k ++; + if (j == bits - 1) { + if (!silent) + printf ("%d)\n", j + i * bits); + zeros += k; + goto end2; + } + j++; + } + zeros += k; + if (!silent) + printf ("%d) Busy(%d-", j - 1 + i * bits, j + i * bits); + + k = 0; + while (test_bit (j, data)) { + ones ++; + if (j == bits - 1) { + if (!silent) + printf ("%d)\n", j + i * bits); + ones += k; + goto end2; + } + j++; + } + ones += k; + if (!silent) + printf ("%d) Busy(%d-", j - 1 + i * bits, j + i * bits); + + j --; + end2: + } + } + + printf ("used %d, free %d\n", ones, zeros); +} + + +/* if silent == 1, do not print details */ +void print_bmap (struct super_block * s, int silent) +{ + int bmapnr = SB_BMAP_NR (s); + int i; + + printf ("Bitmap blocks are:\n"); + for (i = 0; i < bmapnr; i ++) { + printf ("#%d: block %lu: ", i, SB_AP_BITMAP(s)[i]->b_blocknr); + print_bmap_block (i, SB_AP_BITMAP(s)[i]->b_data, s->s_blocksize, silent); + } + +} + + + + +void print_objectid_map (struct super_block * s) +{ + int i; + struct reiserfs_super_block * rs; + unsigned long * omap; + + rs = SB_DISK_SUPER_BLOCK (s); + omap = (unsigned long *)(rs + 1); + printk ("Map of objectids\n"); + + for (i = 0; i < rs->s_oid_cursize; i ++) { + if (i % 2 == 0) + printk ("busy(%lu-%lu) ", omap[i], omap[i+1] - 1); + else + printk ("free(%lu-%lu) ", + omap[i], ((i+1) == rs->s_oid_cursize) ? -1 : omap[i+1] - 1); + } + printk ("\n"); + + printk ("Object id array has size %d (max %d):", rs->s_oid_cursize, + rs->s_oid_maxsize); + + for (i = 0; i < rs->s_oid_cursize; i ++) + printk ("%lu ", omap[i]); + printk ("\n"); + +} + +#endif /* #ifndef __KERNEL__ */ + + +static void check_leaf_block_head (struct buffer_head * bh) +{ + struct block_head * blkh; + + blkh = B_BLK_HEAD (bh); + if (le16_to_cpu (blkh->blk_nr_item) > (bh->b_size - BLKH_SIZE) / IH_SIZE) + reiserfs_panic (0, "vs-6010: check_leaf_block_head: invalid item number %z", bh); + if (le16_to_cpu (blkh->blk_free_space) > + bh->b_size - BLKH_SIZE - IH_SIZE * le16_to_cpu (blkh->blk_nr_item)) + reiserfs_panic (0, "vs-6020: check_leaf_block_head: invalid free space %z", bh); + +} + +static void check_internal_block_head (struct buffer_head * bh) +{ + struct block_head * blkh; + + blkh = B_BLK_HEAD (bh); + if (!(B_LEVEL (bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL (bh) <= MAX_HEIGHT)) + reiserfs_panic (0, "vs-6025: check_internal_block_head: invalid level %z", bh); + + if (B_NR_ITEMS (bh) > (bh->b_size - BLKH_SIZE) / IH_SIZE) + reiserfs_panic (0, "vs-6030: check_internal_block_head: invalid item number %z", bh); + + if (B_FREE_SPACE (bh) != + bh->b_size - BLKH_SIZE - KEY_SIZE * B_NR_ITEMS (bh) - DC_SIZE * (B_NR_ITEMS (bh) + 1)) + reiserfs_panic (0, "vs-6040: check_internal_block_head: invalid free space %z", bh); + +} + + +void check_leaf (struct buffer_head * bh) +{ + int i; + struct item_head * ih; + + if (!bh) + return; + check_leaf_block_head (bh); + for (i = 0, ih = B_N_PITEM_HEAD (bh, 0); i < B_NR_ITEMS (bh); i ++, ih ++) + op_check_item (ih, B_I_PITEM (bh, ih)); +} + + +void check_internal (struct buffer_head * bh) +{ + if (!bh) + return; + check_internal_block_head (bh); +} + + +void print_statistics (struct super_block * s) +{ + + /* + printk ("reiserfs_put_super: session statistics: balances %d, fix_nodes %d, preserve list freeings %d, \ +bmap with search %d, without %d, dir2ind %d, ind2dir %d\n", + s->u.reiserfs_sb.s_do_balance, s->u.reiserfs_sb.s_fix_nodes, s->u.reiserfs_sb.s_preserve_list_freeings, + s->u.reiserfs_sb.s_bmaps, s->u.reiserfs_sb.s_bmaps_without_search, + s->u.reiserfs_sb.s_direct2indirect, s->u.reiserfs_sb.s_indirect2direct); + */ + +} diff -u -r --new-file linux/fs/reiserfs/resize.c v2.4.0-test8/linux/fs/reiserfs/resize.c --- linux/fs/reiserfs/resize.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/resize.c Mon Sep 11 05:21:51 2000 @@ -0,0 +1,178 @@ +/* Copyright 1999 Hans Reiser, see README file for licensing details. + * + * Written by Alexander Zarochentcev. + * + * The kernel part of the (on-line) reiserfs resizer. + */ + +#ifdef __KERNEL__ + +#include <linux/kernel.h> +#include <linux/vmalloc.h> +#include <linux/locks.h> +#include <linux/string.h> +#include <linux/reiserfs_fs.h> +#include <linux/reiserfs_fs_sb.h> + +#else + +#include "nokernel.h" + +#endif + +int reiserfs_resize (struct super_block * s, unsigned long block_count_new) +{ + struct reiserfs_super_block * sb; + struct buffer_head ** bitmap, * bh; + struct reiserfs_transaction_handle th; + unsigned int bmap_nr_new, bmap_nr; + unsigned int block_r_new, block_r; + + struct reiserfs_list_bitmap * jb; + struct reiserfs_list_bitmap jbitmap[JOURNAL_NUM_BITMAPS]; + + unsigned long int block_count, free_blocks; + int i; + int copy_size ; + + sb = SB_DISK_SUPER_BLOCK(s); + + if (SB_BLOCK_COUNT(s) >= block_count_new) { + printk("can\'t shrink filesystem on-line\n"); + return 1; + } + + /* check the device size */ + bh = bread(s->s_dev, block_count_new - 1, s->s_blocksize); + if (!bh) { + printk("reiserfs_resize: can\'t read last block\n"); + return 1; + } + brelse(bh); + + /* old disk layout detection; those partitions can be mounted, but + * cannot be resized */ + if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size + != REISERFS_DISK_OFFSET_IN_BYTES ) { + printk("reiserfs_resize: unable to resize a reiserfs without distributed bitmap (fs version < 3.5.12)\n"); + return 1; + } + + + /* count used bits in last bitmap block */ + block_r = SB_BLOCK_COUNT(s) - + (SB_BMAP_NR(s) - 1) * s->s_blocksize * 8; + + /* count bitmap blocks in new fs */ + bmap_nr_new = block_count_new / ( s->s_blocksize * 8 ); + block_r_new = block_count_new - bmap_nr_new * s->s_blocksize * 8; + if (block_r_new) + bmap_nr_new++; + else + block_r_new = s->s_blocksize * 8; + + /* save old values */ + block_count = SB_BLOCK_COUNT(s); + bmap_nr = SB_BMAP_NR(s); + + /* reallocate journal bitmaps */ + if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) { + printk("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); + unlock_super(s) ; + return 1 ; + } + /* the new journal bitmaps are zero filled, now we copy in the bitmap + ** node pointers from the old journal bitmap structs, and then + ** transfer the new data structures into the journal struct. + ** + ** using the copy_size var below allows this code to work for + ** both shrinking and expanding the FS. + */ + copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr ; + copy_size = copy_size * sizeof(struct reiserfs_list_bitmap_node *) ; + for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { + struct reiserfs_bitmap_node **node_tmp ; + jb = SB_JOURNAL(s)->j_list_bitmap + i ; + memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size) ; + + /* just in case vfree schedules on us, copy the new + ** pointer into the journal struct before freeing the + ** old one + */ + node_tmp = jb->bitmaps ; + jb->bitmaps = jbitmap[i].bitmaps ; + vfree(node_tmp) ; + } + + /* allocate additional bitmap blocks, reallocate array of bitmap + * block pointers */ + if (bmap_nr_new > bmap_nr) { + bitmap = reiserfs_kmalloc(sizeof(struct buffer_head *) * bmap_nr_new, + GFP_KERNEL, s); + if (!bitmap) { + printk("reiserfs_resize: unable to allocate memory.\n"); + return 1; + } + for (i = 0; i < bmap_nr; i++) + bitmap[i] = SB_AP_BITMAP(s)[i]; + for (i = bmap_nr; i < bmap_nr_new; i++) { + bitmap[i] = reiserfs_getblk(s->s_dev, i * s->s_blocksize * 8, s->s_blocksize); + if(!bitmap[i]) { + printk("reiserfs_resize: getblk() failed"); + while (--i >= bmap_nr) + brelse(bitmap[i]); + reiserfs_kfree(bitmap, + sizeof(struct buffer_head *) * bmap_nr_new, s); + return 1; + } + memset(bitmap[i]->b_data, 0, sb->s_blocksize); + reiserfs_test_and_set_le_bit(0, bitmap[i]->b_data); + + mark_buffer_dirty(bitmap[i]) ; + mark_buffer_uptodate(bitmap[i], 1); + ll_rw_block(WRITE, 1, bitmap + i); + wait_on_buffer(bitmap[i]); + } + /* free old bitmap blocks array */ + reiserfs_kfree(SB_AP_BITMAP(s), + sizeof(struct buffer_head *) * bmap_nr, s); + SB_AP_BITMAP(s) = bitmap; + } + + unlock_super(s) ; /* deadlock avoidance */ + /* begin transaction */ + journal_begin(&th, s, 10); + lock_super(s) ; /* must keep super locked during these ops */ + + /* correct last bitmap blocks in old and new disk layout */ + reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr - 1], 1); + for (i = block_r; i < s->s_blocksize * 8; i++) + reiserfs_test_and_clear_le_bit(i, + SB_AP_BITMAP(s)[bmap_nr - 1]->b_data); + journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr - 1]); + + reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr_new - 1], 1); + for (i = block_r_new; i < s->s_blocksize * 8; i++) + reiserfs_test_and_set_le_bit(i, + SB_AP_BITMAP(s)[bmap_nr_new - 1]->b_data); + journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr_new - 1]); + + /* update super */ + free_blocks = SB_FREE_BLOCKS(s); + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; + sb->s_free_blocks = cpu_to_le32(free_blocks + (block_count_new + - block_count - (bmap_nr_new - bmap_nr))); + sb->s_block_count = cpu_to_le32(block_count_new); + sb->s_bmap_nr = cpu_to_le16(bmap_nr_new); + s->s_dirt = 1; + + journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); + + SB_JOURNAL(s)->j_must_wait = 1; + unlock_super(s) ; /* see comments in reiserfs_put_super() */ + journal_end(&th, s, 10); + lock_super(s); + + return 0; +} + diff -u -r --new-file linux/fs/reiserfs/stree.c v2.4.0-test8/linux/fs/reiserfs/stree.c --- linux/fs/reiserfs/stree.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/stree.c Thu Sep 21 12:25:39 2000 @@ -0,0 +1,2078 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ + +/* + * Written by Anatoly P. Pinchuk pap@namesys.botik.ru + * Programm System Institute + * Pereslavl-Zalessky Russia + */ + +/* + * This file contains functions dealing with S+tree + * + * B_IS_IN_TREE + * copy_short_key + * copy_item_head + * comp_short_keys + * comp_keys + * comp_cpu_keys + * comp_short_le_keys + * comp_short_cpu_keys + * cpu_key2cpu_key + * le_key2cpu_key + * comp_le_keys + * bin_search + * get_lkey + * get_rkey + * key_in_buffer + * decrement_bcount + * decrement_counters_in_path + * reiserfs_check_path + * pathrelse_and_restore + * pathrelse + * search_by_key_reada + * search_by_key + * search_for_position_by_key + * comp_items + * prepare_for_direct_item + * prepare_for_direntry_item + * prepare_for_delete_or_cut + * calc_deleted_bytes_number + * init_tb_struct + * padd_item + * reiserfs_delete_item + * reiserfs_delete_solid_item + * reiserfs_delete_object + * maybe_indirect_to_direct + * indirect_to_direct_roll_back + * reiserfs_cut_from_item + * truncate_directory + * reiserfs_do_truncate + * reiserfs_paste_into_item + * reiserfs_insert_item + */ +#ifdef __KERNEL__ + +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/pagemap.h> +#include <linux/reiserfs_fs.h> +#include <linux/smp_lock.h> + +#else + +#include "nokernel.h" + +#endif + + + +/* Does the buffer contain a disk block which is in the tree. */ +inline int B_IS_IN_TREE (struct buffer_head * p_s_bh) +{ + +#ifdef CONFIG_REISERFS_CHECK + + if ( B_LEVEL (p_s_bh) > MAX_HEIGHT ) { + reiserfs_panic(0, "PAP-1010: B_IS_IN_TREE: block (%b) has too big level (%z)", + p_s_bh, p_s_bh); + } +#endif + + return ( B_LEVEL (p_s_bh) != FREE_LEVEL ); +} + + + + +inline void copy_short_key (void * to, void * from) +{ + memcpy (to, from, SHORT_KEY_SIZE); +} + +// +// to gets item head in le form +// +inline void copy_item_head(void * p_v_to, void * p_v_from) +{ + memcpy (p_v_to, p_v_from, IH_SIZE); +} + + +/* k1 is pointer to on-disk structure which is stored in little-endian + form. k2 is pointer to cpu variable. For key of items of the same + object this returns 0. + Returns: -1 if key1 < key2 + 0 if key1 == key2 + 1 if key1 > key2 */ +inline int comp_short_keys (struct key * le_key, struct cpu_key * cpu_key) +{ + __u32 * p_s_le_u32, * p_s_cpu_u32; + int n_key_length = REISERFS_SHORT_KEY_LEN; + + p_s_le_u32 = (__u32 *)le_key; + p_s_cpu_u32 = (__u32 *)cpu_key; + for( ; n_key_length--; ++p_s_le_u32, ++p_s_cpu_u32 ) { + if ( le32_to_cpu (*p_s_le_u32) < *p_s_cpu_u32 ) + return -1; + if ( le32_to_cpu (*p_s_le_u32) > *p_s_cpu_u32 ) + return 1; + } + + return 0; +} + + +/* k1 is pointer to on-disk structure which is stored in little-endian + form. k2 is pointer to cpu variable. + Compare keys using all 4 key fields. + Returns: -1 if key1 < key2 0 + if key1 = key2 1 if key1 > key2 */ +inline int comp_keys (struct key * le_key, struct cpu_key * cpu_key) +{ + int retval; + + retval = comp_short_keys (le_key, cpu_key); + if (retval) + return retval; + if (le_key_k_offset (cpu_key->version, le_key) < cpu_key_k_offset (cpu_key)) + return -1; + if (le_key_k_offset (cpu_key->version, le_key) > cpu_key_k_offset (cpu_key)) + return 1; + + if (cpu_key->key_length == 3) + return 0; + + /* this part is needed only when tail conversion is in progress */ + if (le_key_k_type (cpu_key->version, le_key) < cpu_key_k_type (cpu_key)) + return -1; + + if (le_key_k_type (cpu_key->version, le_key) > cpu_key_k_type (cpu_key)) + return 1; + + return 0; +} + + +// +// FIXME: not used yet +// +inline int comp_cpu_keys (struct cpu_key * key1, struct cpu_key * key2) +{ + if (key1->on_disk_key.k_dir_id < key2->on_disk_key.k_dir_id) + return -1; + if (key1->on_disk_key.k_dir_id > key2->on_disk_key.k_dir_id) + return 1; + + if (key1->on_disk_key.k_objectid < key2->on_disk_key.k_objectid) + return -1; + if (key1->on_disk_key.k_objectid > key2->on_disk_key.k_objectid) + return 1; + + if (cpu_key_k_offset (key1) < cpu_key_k_offset (key2)) + return -1; + if (cpu_key_k_offset (key1) > cpu_key_k_offset (key2)) + return 1; + + reiserfs_warning ("comp_cpu_keys: type are compared for %k and %k\n", + key1, key2); + + if (cpu_key_k_type (key1) < cpu_key_k_type (key2)) + return -1; + if (cpu_key_k_type (key1) > cpu_key_k_type (key2)) + return 1; + return 0; +} + +inline int comp_short_le_keys (struct key * key1, struct key * key2) +{ + __u32 * p_s_1_u32, * p_s_2_u32; + int n_key_length = REISERFS_SHORT_KEY_LEN; + + p_s_1_u32 = (__u32 *)key1; + p_s_2_u32 = (__u32 *)key2; + for( ; n_key_length--; ++p_s_1_u32, ++p_s_2_u32 ) { + if ( le32_to_cpu (*p_s_1_u32) < le32_to_cpu (*p_s_2_u32) ) + return -1; + if ( le32_to_cpu (*p_s_1_u32) > le32_to_cpu (*p_s_2_u32) ) + return 1; + } + return 0; +} + +inline int comp_short_cpu_keys (struct cpu_key * key1, + struct cpu_key * key2) +{ + __u32 * p_s_1_u32, * p_s_2_u32; + int n_key_length = REISERFS_SHORT_KEY_LEN; + + p_s_1_u32 = (__u32 *)key1; + p_s_2_u32 = (__u32 *)key2; + + for( ; n_key_length--; ++p_s_1_u32, ++p_s_2_u32 ) { + if ( *p_s_1_u32 < *p_s_2_u32 ) + return -1; + if ( *p_s_1_u32 > *p_s_2_u32 ) + return 1; + } + return 0; +} + + + +inline void cpu_key2cpu_key (struct cpu_key * to, struct cpu_key * from) +{ + memcpy (to, from, sizeof (struct cpu_key)); +} + + +inline void le_key2cpu_key (struct cpu_key * to, struct key * from) +{ + to->on_disk_key.k_dir_id = le32_to_cpu (from->k_dir_id); + to->on_disk_key.k_objectid = le32_to_cpu (from->k_objectid); + + // find out version of the key + to->version = le_key_version (from); + if (to->version == ITEM_VERSION_1) { + to->on_disk_key.u.k_offset_v1.k_offset = le32_to_cpu (from->u.k_offset_v1.k_offset); + to->on_disk_key.u.k_offset_v1.k_uniqueness = le32_to_cpu (from->u.k_offset_v1.k_uniqueness); + } else { + to->on_disk_key.u.k_offset_v2.k_offset = le64_to_cpu (from->u.k_offset_v2.k_offset); + to->on_disk_key.u.k_offset_v2.k_type = le16_to_cpu (from->u.k_offset_v2.k_type); + } +} + + + +// this does not say which one is bigger, it only returns 1 if keys +// are not equal, 0 otherwise +inline int comp_le_keys (struct key * k1, struct key * k2) +{ + return memcmp (k1, k2, sizeof (struct key)); +} + +/************************************************************************** + * Binary search toolkit function * + * Search for an item in the array by the item key * + * Returns: 1 if found, 0 if not found; * + * *p_n_pos = number of the searched element if found, else the * + * number of the first element that is larger than p_v_key. * + **************************************************************************/ +/* For those not familiar with binary search: n_lbound is the leftmost item that it + could be, n_rbound the rightmost item that it could be. We examine the item + halfway between n_lbound and n_rbound, and that tells us either that we can increase + n_lbound, or decrease n_rbound, or that we have found it, or if n_lbound <= n_rbound that + there are no possible items, and we have not found it. With each examination we + cut the number of possible items it could be by one more than half rounded down, + or we find it. */ +inline int bin_search ( + void * p_v_key, /* Key to search for. */ + void * p_v_base, /* First item in the array. */ + int p_n_num, /* Number of items in the array. */ + int p_n_width, /* Item size in the array. + searched. Lest the reader be + confused, note that this is crafted + as a general function, and when it + is applied specifically to the array + of item headers in a node, p_n_width + is actually the item header size not + the item size. */ + int * p_n_pos /* Number of the searched for element. */ + ) { + int n_rbound, n_lbound, n_j; + + for ( n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0))/2; n_lbound <= n_rbound; n_j = (n_rbound + n_lbound)/2 ) + switch( COMP_KEYS((struct key *)((char * )p_v_base + n_j * p_n_width), (struct cpu_key *)p_v_key) ) { + case -1: n_lbound = n_j + 1; continue; + case 1: n_rbound = n_j - 1; continue; + case 0: *p_n_pos = n_j; return ITEM_FOUND; /* Key found in the array. */ + } + + /* bin_search did not find given key, it returns position of key, + that is minimal and greater than the given one. */ + *p_n_pos = n_lbound; + return ITEM_NOT_FOUND; +} + +#ifdef CONFIG_REISERFS_CHECK +extern struct tree_balance * cur_tb; +#endif + + + +/* Minimal possible key. It is never in the tree. */ +struct key MIN_KEY = {0, 0, {{0, 0},}}; + +/* Maximal possible key. It is never in the tree. */ +struct key MAX_KEY = {0xffffffff, 0xffffffff, {{0xffffffff, 0xffffffff},}}; + + +/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom + of the path, and going upwards. We must check the path's validity at each step. If the key is not in + the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this + case we return a special key, either MIN_KEY or MAX_KEY. */ +inline struct key * get_lkey ( + struct path * p_s_chk_path, + struct super_block * p_s_sb + ) { + int n_position, n_path_offset = p_s_chk_path->path_length; + struct buffer_head * p_s_parent; + +#ifdef CONFIG_REISERFS_CHECK + if ( n_path_offset < FIRST_PATH_ELEMENT_OFFSET ) + reiserfs_panic(p_s_sb,"PAP-5010: get_lkey: illegal offset in the path"); +#endif + + /* While not higher in path than first element. */ + while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) { + +#ifdef CONFIG_REISERFS_CHECK + if ( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) ) + reiserfs_panic(p_s_sb, "PAP-5020: get_lkey: parent is not uptodate"); +#endif + + /* Parent at the path is not in the tree now. */ + if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) ) + return &MAX_KEY; + /* Check whether position in the parent is correct. */ + if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) ) + return &MAX_KEY; + /* Check whether parent at the path really points to the child. */ + if ( B_N_CHILD_NUM(p_s_parent, n_position) != + PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr ) + return &MAX_KEY; + /* Return delimiting key if position in the parent is not equal to zero. */ + if ( n_position ) + return B_N_PDELIM_KEY(p_s_parent, n_position - 1); + } + /* Return MIN_KEY if we are in the root of the buffer tree. */ + if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == + SB_ROOT_BLOCK (p_s_sb) ) + return &MIN_KEY; + return &MAX_KEY; +} + + +/* Get delimiting key of the buffer at the path and its right neighbor. */ +inline struct key * get_rkey ( + struct path * p_s_chk_path, + struct super_block * p_s_sb + ) { + int n_position, + n_path_offset = p_s_chk_path->path_length; + struct buffer_head * p_s_parent; + +#ifdef CONFIG_REISERFS_CHECK + if ( n_path_offset < FIRST_PATH_ELEMENT_OFFSET ) + reiserfs_panic(p_s_sb,"PAP-5030: get_rkey: illegal offset in the path"); +#endif + + while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) { + +#ifdef CONFIG_REISERFS_CHECK + if ( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) ) + reiserfs_panic(p_s_sb, "PAP-5040: get_rkey: parent is not uptodate"); +#endif + + /* Parent at the path is not in the tree now. */ + if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) ) + return &MIN_KEY; + /* Check whether position in the parrent is correct. */ + if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) ) + return &MIN_KEY; + /* Check whether parent at the path really points to the child. */ + if ( B_N_CHILD_NUM(p_s_parent, n_position) != + PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr ) + return &MIN_KEY; + /* Return delimiting key if position in the parent is not the last one. */ + if ( n_position != B_NR_ITEMS(p_s_parent) ) + return B_N_PDELIM_KEY(p_s_parent, n_position); + } + /* Return MAX_KEY if we are in the root of the buffer tree. */ + if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == + SB_ROOT_BLOCK (p_s_sb) ) + return &MAX_KEY; + return &MIN_KEY; +} + + +/* Check whether a key is contained in the tree rooted from a buffer at a path. */ +/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in + the path. These delimiting keys are stored at least one level above that buffer in the tree. If the + buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in + this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */ +static inline int key_in_buffer ( + struct path * p_s_chk_path, /* Path which should be checked. */ + struct cpu_key * p_s_key, /* Key which should be checked. */ + struct super_block * p_s_sb /* Super block pointer. */ + ) { + +#ifdef CONFIG_REISERFS_CHECK + if ( ! p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET || + p_s_chk_path->path_length > MAX_HEIGHT ) + reiserfs_panic(p_s_sb, "PAP-5050: key_in_buffer: pointer to the key(%p) is NULL or illegal path length(%d)", + p_s_key, p_s_chk_path->path_length); + + if ( PATH_PLAST_BUFFER(p_s_chk_path)->b_dev == NODEV ) + reiserfs_panic(p_s_sb, "PAP-5060: key_in_buffer: device must not be NODEV"); +#endif + + if ( COMP_KEYS(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1 ) + /* left delimiting key is bigger, that the key we look for */ + return 0; + // if ( COMP_KEYS(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 ) + if ( COMP_KEYS(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1 ) + /* p_s_key must be less than right delimitiing key */ + return 0; + return 1; +} + + +inline void decrement_bcount( + struct buffer_head * p_s_bh + ) { + if ( p_s_bh ) { + if ( atomic_read (&(p_s_bh->b_count)) ) { + atomic_dec (&(p_s_bh->b_count)); + return; + } + reiserfs_panic(NULL, "PAP-5070: decrement_bcount: trying to free free buffer %b", p_s_bh); + } +} + + +/* Decrement b_count field of the all buffers in the path. */ +void decrement_counters_in_path ( + struct path * p_s_search_path + ) { + int n_path_offset = p_s_search_path->path_length; + +#ifdef CONFIG_REISERFS_CHECK + if ( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET || + n_path_offset > EXTENDED_MAX_HEIGHT - 1 ) + reiserfs_panic(NULL, "PAP-5080: decrement_counters_in_path: illegal path offset of %d", n_path_offset); +#endif + + while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) { + struct buffer_head * bh; + + bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--); + decrement_bcount (bh); + } + p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; +} + + +int reiserfs_check_path(struct path *p) { +#ifdef CONFIG_REISERFS_CHECK + if (p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET) { + reiserfs_warning("check_path, path not properly relsed\n") ; + BUG() ; + } +#endif + return 0 ; +} + + +/* Release all buffers in the path. Restore dirty bits clean +** when preparing the buffer for the log +** +** only called from fix_nodes() +*/ +void pathrelse_and_restore ( + struct super_block *s, + struct path * p_s_search_path + ) { + int n_path_offset = p_s_search_path->path_length; + +#ifdef CONFIG_REISERFS_CHECK + if ( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET ) + reiserfs_panic(NULL, "clm-4000: pathrelse: illegal path offset"); +#endif + + while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) { + reiserfs_restore_prepared_buffer(s, PATH_OFFSET_PBUFFER(p_s_search_path, + n_path_offset)); + brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--)); + } + p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; +} + +/* Release all buffers in the path. */ +void pathrelse ( + struct path * p_s_search_path + ) { + int n_path_offset = p_s_search_path->path_length; + +#ifdef CONFIG_REISERFS_CHECK + if ( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET ) + reiserfs_panic(NULL, "PAP-5090: pathrelse: illegal path offset"); +#endif + + while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) + brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--)); + + p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; +} + + + +static int is_leaf (char * buf, int blocksize, struct buffer_head * bh) +{ + struct block_head * blkh; + struct item_head * ih; + int used_space; + int prev_location; + int i; + int nr; + + blkh = (struct block_head *)buf; + if (le16_to_cpu (blkh->blk_level) != DISK_LEAF_NODE_LEVEL) { + printk ("is_leaf: this should be caught earlier\n"); + return 0; + } + + nr = le16_to_cpu (blkh->blk_nr_item); + if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) { + /* item number is too big or too small */ + reiserfs_warning ("is_leaf: nr_item seems wrong: %z\n", bh); + return 0; + } + ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; + used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location (ih)); + if (used_space != blocksize - le16_to_cpu (blkh->blk_free_space)) { + /* free space does not match to calculated amount of use space */ + reiserfs_warning ("is_leaf: free space seems wrong: %z\n", bh); + return 0; + } + + // FIXME: it is_leaf will hit performance too much - we may have + // return 1 here + + /* check tables of item heads */ + ih = (struct item_head *)(buf + BLKH_SIZE); + prev_location = blocksize; + for (i = 0; i < nr; i ++, ih ++) { + if (ih_location (ih) >= blocksize || ih_location (ih) < IH_SIZE * nr) { + reiserfs_warning ("is_leaf: item location seems wrong: %h\n", ih); + return 0; + } + if (ih_item_len (ih) < 1 || ih_item_len (ih) > MAX_ITEM_LEN (blocksize)) { + reiserfs_warning ("is_leaf: item length seems wrong: %h\n", ih); + return 0; + } + if (prev_location - ih_location (ih) != ih_item_len (ih)) { + reiserfs_warning ("is_leaf: item location seems wrong (second one): %h\n", ih); + return 0; + } + prev_location = ih_location (ih); + } + + // one may imagine much more checks + return 1; +} + + +/* returns 1 if buf looks like an internal node, 0 otherwise */ +static int is_internal (char * buf, int blocksize, struct buffer_head * bh) +{ + struct block_head * blkh; + int nr; + int used_space; + + blkh = (struct block_head *)buf; + if (le16_to_cpu (blkh->blk_level) <= DISK_LEAF_NODE_LEVEL || + le16_to_cpu (blkh->blk_level) > MAX_HEIGHT) { + /* this level is not possible for internal nodes */ + printk ("is_internal: this should be caught earlier\n"); + return 0; + } + + nr = le16_to_cpu (blkh->blk_nr_item); + if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { + /* for internal which is not root we might check min number of keys */ + reiserfs_warning ("is_internal: number of key seems wrong: %z\n", bh); + return 0; + } + + used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1); + if (used_space != blocksize - le16_to_cpu (blkh->blk_free_space)) { + reiserfs_warning ("is_internal: free space seems wrong: %z\n", bh); + return 0; + } + + // one may imagine much more checks + return 1; +} + + +// make sure that bh contains formatted node of reiserfs tree of +// 'level'-th level +static int is_tree_node (struct buffer_head * bh, int level) +{ + if (B_LEVEL (bh) != level) { + printk ("is_tree_node: node level %d does not match to the expected one %d\n", + B_LEVEL (bh), level); + return 0; + } + if (level == DISK_LEAF_NODE_LEVEL) + return is_leaf (bh->b_data, bh->b_size, bh); + + return is_internal (bh->b_data, bh->b_size, bh); +} + + + +#ifdef SEARCH_BY_KEY_READA + +/* The function is NOT SCHEDULE-SAFE! */ +static void search_by_key_reada (struct super_block * s, int blocknr) +{ + struct buffer_head * bh; + + if (blocknr == 0) + return; + + bh = reiserfs_getblk (s->s_dev, blocknr, s->s_blocksize); + + if (!buffer_uptodate (bh)) { + ll_rw_block (READA, 1, &bh); + } + bh->b_count --; +} + +#endif + +/************************************************************************** + * Algorithm SearchByKey * + * look for item in the Disk S+Tree by its key * + * Input: p_s_sb - super block * + * p_s_key - pointer to the key to search * + * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR * + * p_s_search_path - path from the root to the needed leaf * + **************************************************************************/ + +/* This function fills up the path from the root to the leaf as it + descends the tree looking for the key. It uses reiserfs_bread to + try to find buffers in the cache given their block number. If it + does not find them in the cache it reads them from disk. For each + node search_by_key finds using reiserfs_bread it then uses + bin_search to look through that node. bin_search will find the + position of the block_number of the next node if it is looking + through an internal node. If it is looking through a leaf node + bin_search will find the position of the item which has key either + equal to given key, or which is the maximal key less than the given + key. search_by_key returns a path that must be checked for the + correctness of the top of the path but need not be checked for the + correctness of the bottom of the path */ +/* The function is NOT SCHEDULE-SAFE! */ +int search_by_key (struct super_block * p_s_sb, + struct cpu_key * p_s_key, /* Key to search. */ + struct path * p_s_search_path, /* This structure was + allocated and initialized + by the calling + function. It is filled up + by this function. */ + int n_stop_level /* How far down the tree to search. To + stop at leaf level - set to + DISK_LEAF_NODE_LEVEL */ + ) { + kdev_t n_dev = p_s_sb->s_dev; + int n_block_number = SB_ROOT_BLOCK (p_s_sb), + expected_level = SB_TREE_HEIGHT (p_s_sb), + n_block_size = p_s_sb->s_blocksize; + struct buffer_head * p_s_bh; + struct path_element * p_s_last_element; + int n_node_level, n_retval; + int right_neighbor_of_leaf_node; + int fs_gen; + +#ifdef CONFIG_REISERFS_CHECK + int n_repeat_counter = 0; +#endif + + /* As we add each node to a path we increase its count. This means that + we must be careful to release all nodes in a path before we either + discard the path struct or re-use the path struct, as we do here. */ + + decrement_counters_in_path(p_s_search_path); + + right_neighbor_of_leaf_node = 0; + + /* With each iteration of this loop we search through the items in the + current node, and calculate the next current node(next path element) + for the next iteration of this loop.. */ + while ( 1 ) { + +#ifdef CONFIG_REISERFS_CHECK + if ( !(++n_repeat_counter % 50000) ) + reiserfs_warning ("PAP-5100: search_by_key: %s:" + "there were %d iterations of while loop " + "looking for key %K\n", + current->comm, n_repeat_counter, p_s_key); +#endif + + /* prep path to have another element added to it. */ + p_s_last_element = PATH_OFFSET_PELEMENT(p_s_search_path, ++p_s_search_path->path_length); + fs_gen = get_generation (p_s_sb); + expected_level --; + +#ifdef SEARCH_BY_KEY_READA + /* schedule read of right neighbor */ + search_by_key_reada (p_s_sb, right_neighbor_of_leaf_node); +#endif + + /* Read the next tree node, and set the last element in the path to + have a pointer to it. */ + if ( ! (p_s_bh = p_s_last_element->pe_buffer = + reiserfs_bread(n_dev, n_block_number, n_block_size)) ) { + p_s_search_path->path_length --; + pathrelse(p_s_search_path); + return IO_ERROR; + } + + /* It is possible that schedule occured. We must check whether the key + to search is still in the tree rooted from the current buffer. If + not then repeat search from the root. */ + if ( fs_changed (fs_gen, p_s_sb) && + (!B_IS_IN_TREE (p_s_bh) || !key_in_buffer(p_s_search_path, p_s_key, p_s_sb)) ) { + decrement_counters_in_path(p_s_search_path); + + /* Get the root block number so that we can repeat the search + starting from the root. */ + n_block_number = SB_ROOT_BLOCK (p_s_sb); + expected_level = SB_TREE_HEIGHT (p_s_sb); + right_neighbor_of_leaf_node = 0; + + /* repeat search from the root */ + continue; + } + +#ifdef CONFIG_REISERFS_CHECK + + if ( ! key_in_buffer(p_s_search_path, p_s_key, p_s_sb) ) + reiserfs_panic(p_s_sb, "PAP-5130: search_by_key: key is not in the buffer"); + if ( cur_tb ) { + print_cur_tb ("5140"); + reiserfs_panic(p_s_sb, "PAP-5140: search_by_key: schedule occurred in do_balance!"); + } + +#endif + + // make sure, that the node contents look like a node of + // certain level + if (!is_tree_node (p_s_bh, expected_level)) { + reiserfs_warning ("vs-5150: search_by_key: " + "invalid format found in block %d. Fsck?\n", p_s_bh->b_blocknr); + pathrelse (p_s_search_path); + return IO_ERROR; + } + + /* ok, we have acquired next formatted node in the tree */ + n_node_level = B_LEVEL (p_s_bh); + +#ifdef CONFIG_REISERFS_CHECK + + if (n_node_level < n_stop_level) + reiserfs_panic (p_s_sb, "vs-5152: search_by_key: tree level is less than stop level (%d)", + n_node_level, n_stop_level); + +#endif + + n_retval = bin_search (p_s_key, B_N_PITEM_HEAD(p_s_bh, 0), B_NR_ITEMS(p_s_bh), + ( n_node_level == DISK_LEAF_NODE_LEVEL ) ? IH_SIZE : KEY_SIZE, &(p_s_last_element->pe_position)); + if (n_node_level == n_stop_level) { + return n_retval; + } + + /* we are not in the stop level */ + if (n_retval == ITEM_FOUND) + /* item has been found, so we choose the pointer which is to the right of the found one */ + p_s_last_element->pe_position++; + + /* if item was not found we choose the position which is to + the left of the found item. This requires no code, + bin_search did it already.*/ + + /* So we have chosen a position in the current node which is + an internal node. Now we calculate child block number by + position in the node. */ + n_block_number = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position); + +#ifdef SEARCH_BY_KEY_READA + /* if we are going to read leaf node, then calculate its right neighbor if possible */ + if (n_node_level == DISK_LEAF_NODE_LEVEL + 1 && p_s_last_element->pe_position < B_NR_ITEMS (p_s_bh)) + right_neighbor_of_leaf_node = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position + 1); +#endif + } +} + + +/* Form the path to an item and position in this item which contains + file byte defined by p_s_key. If there is no such item + corresponding to the key, we point the path to the item with + maximal key less than p_s_key, and *p_n_pos_in_item is set to one + past the last entry/byte in the item. If searching for entry in a + directory item, and it is not found, *p_n_pos_in_item is set to one + entry more than the entry with maximal key which is less than the + sought key. + + Note that if there is no entry in this same node which is one more, + then we point to an imaginary entry. for direct items, the + position is in units of bytes, for indirect items the position is + in units of blocknr entries, for directory items the position is in + units of directory entries. */ + +/* The function is NOT SCHEDULE-SAFE! */ +int search_for_position_by_key (struct super_block * p_s_sb, /* Pointer to the super block. */ + struct cpu_key * p_cpu_key, /* Key to search (cpu variable) */ + struct path * p_s_search_path /* Filled up by this function. */ + ) { + struct item_head * p_le_ih; /* pointer to on-disk structure */ + int n_blk_size; + loff_t item_offset, offset; + struct reiserfs_dir_entry de; + int retval; + + /* If searching for directory entry. */ + if ( is_direntry_cpu_key (p_cpu_key) ) + return search_by_entry_key (p_s_sb, p_cpu_key, p_s_search_path, &de); + + /* If not searching for directory entry. */ + + /* If item is found. */ + retval = search_item (p_s_sb, p_cpu_key, p_s_search_path); + if (retval == IO_ERROR) + return retval; + if ( retval == ITEM_FOUND ) { + +#ifdef CONFIG_REISERFS_CHECK + if ( ! B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), + PATH_LAST_POSITION(p_s_search_path))->ih_item_len ) + reiserfs_panic(p_s_sb, "PAP-5165: search_for_position_by_key: item length equals zero"); +#endif + + pos_in_item(p_s_search_path) = 0; + return POSITION_FOUND; + } + +#ifdef CONFIG_REISERFS_CHECK + if ( ! PATH_LAST_POSITION(p_s_search_path) ) + reiserfs_panic(p_s_sb, "PAP-5170: search_for_position_by_key: position equals zero"); +#endif + + /* Item is not found. Set path to the previous item. */ + p_le_ih = B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), --PATH_LAST_POSITION(p_s_search_path)); + n_blk_size = p_s_sb->s_blocksize; + + if (comp_short_keys (&(p_le_ih->ih_key), p_cpu_key)) { + return FILE_NOT_FOUND; + } + +#if 0 +/*#ifdef CONFIG_REISERFS_CHECK*/ + + /* we expect to find stat data or item of the same type */ + if ( ! is_statdata_le_ih(p_le_ih) && ((is_indirect_cpu_key(p_cpu_key) && ! is_indirect_le_ih(p_le_ih)) || + (is_direct_cpu_key(p_cpu_key) && ! is_direct_le_ih(p_le_ih))) ) { + print_block (PATH_PLAST_BUFFER(p_s_search_path), PRINT_LEAF_ITEMS, + PATH_LAST_POSITION (p_s_search_path) - 2, + PATH_LAST_POSITION (p_s_search_path) + 2); + reiserfs_panic(p_s_sb, "PAP-5190: search_for_position_by_key: " + "found item %h type does not match to the expected one %k", + p_le_ih, p_cpu_key); + } +/*#endif*/ +#endif + + // FIXME: quite ugly this far + + item_offset = le_ih_k_offset (p_le_ih); + offset = cpu_key_k_offset (p_cpu_key); + + /* Needed byte is contained in the item pointed to by the path.*/ + if (item_offset <= offset && + item_offset + op_bytes_number (p_le_ih, n_blk_size) > offset) { + pos_in_item (p_s_search_path) = offset - item_offset; + if ( is_indirect_le_ih(p_le_ih) ) { + pos_in_item (p_s_search_path) /= n_blk_size; + } + return POSITION_FOUND; + } + + /* Needed byte is not contained in the item pointed to by the + path. Set pos_in_item out of the item. */ + if ( is_indirect_le_ih (p_le_ih) ) + pos_in_item (p_s_search_path) = le16_to_cpu (p_le_ih->ih_item_len) / UNFM_P_SIZE; + else + pos_in_item (p_s_search_path) = le16_to_cpu (p_le_ih->ih_item_len); + + return POSITION_NOT_FOUND; +} + + +/* Compare given item and item pointed to by the path. */ +int comp_items (struct item_head * stored_ih, struct path * p_s_path) +{ + struct buffer_head * p_s_bh; + struct item_head * ih; + + /* Last buffer at the path is not in the tree. */ + if ( ! B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)) ) + return 1; + + /* Last path position is invalid. */ + if ( PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh) ) + return 1; + + /* we need only to know, whether it is the same item */ + ih = get_ih (p_s_path); + return memcmp (stored_ih, ih, IH_SIZE); + +#if 0 + /* Get item at the path. */ + p_s_path_item = PATH_PITEM_HEAD(p_s_path); + /* Compare keys. */ + if ( COMP_KEYS(&(p_s_path_item->ih_key), &(p_cpu_ih->ih_key)) ) + return 1; + + /* Compare other items fields. */ + if ( le16_to_cpu (p_s_path_item->u.ih_entry_count) != p_cpu_ih->u.ih_entry_count || + le16_to_cpu (p_s_path_item->ih_item_len) != p_cpu_ih->ih_item_len || + le16_to_cpu ( p_s_path_item->ih_item_location) != p_cpu_ih->ih_item_location ) + return 1; + + /* Items are equal. */ + return 0; +#endif +} + + +/* unformatted nodes are not logged anymore, ever. This is safe +** now +*/ +#define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1) + +// block can not be forgotten as it is in I/O or held by someone +#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh))) + + + +// prepare for delete or cut of direct item +static inline int prepare_for_direct_item (struct path * path, + struct item_head * le_ih, + struct inode * inode, + loff_t new_file_length, + int * cut_size) +{ + loff_t round_len; + + + if ( new_file_length == max_reiserfs_offset (inode) ) { + /* item has to be deleted */ + *cut_size = -(IH_SIZE + le16_to_cpu (le_ih->ih_item_len)); + return M_DELETE; + } + + // new file gets truncated + if (inode_items_version (inode) == ITEM_VERSION_2) { + // + round_len = ROUND_UP (new_file_length); + /* this was n_new_file_length < le_ih ... */ + if ( round_len < le_ih_k_offset (le_ih) ) { + *cut_size = -(IH_SIZE + le16_to_cpu (le_ih->ih_item_len)); + return M_DELETE; /* Delete this item. */ + } + /* Calculate first position and size for cutting from item. */ + pos_in_item (path) = round_len - (le_ih_k_offset (le_ih) - 1); + *cut_size = -(le16_to_cpu (le_ih->ih_item_len) - pos_in_item(path)); + + return M_CUT; /* Cut from this item. */ + } + + + // old file: items may have any length + + if ( new_file_length < le_ih_k_offset (le_ih) ) { + *cut_size = -(IH_SIZE + le16_to_cpu (le_ih->ih_item_len)); + return M_DELETE; /* Delete this item. */ + } + /* Calculate first position and size for cutting from item. */ + *cut_size = -(le16_to_cpu (le_ih->ih_item_len) - + (pos_in_item (path) = new_file_length + 1 - le_ih_k_offset (le_ih))); + return M_CUT; /* Cut from this item. */ +} + + +static inline int prepare_for_direntry_item (struct path * path, + struct item_head * le_ih, + struct inode * inode, + loff_t new_file_length, + int * cut_size) +{ + if (le_ih_k_offset (le_ih) == DOT_OFFSET && + new_file_length == max_reiserfs_offset (inode)) { +#ifdef CONFIG_REISERFS_CHECK + if (ih_entry_count (le_ih) != 2) + reiserfs_panic(inode->i_sb,"PAP-5220: prepare_for_delete_or_cut: " + "incorrect empty directory item (%h)", le_ih); +#endif + *cut_size = -(IH_SIZE + le16_to_cpu (le_ih->ih_item_len)); + return M_DELETE; /* Delete the directory item containing "." and ".." entry. */ + } + + if ( ih_entry_count (le_ih) == 1 ) { + /* Delete the directory item such as there is one record only + in this item*/ + *cut_size = -(IH_SIZE + le16_to_cpu (le_ih->ih_item_len)); + return M_DELETE; + } + + /* Cut one record from the directory item. */ + *cut_size = -(DEH_SIZE + entry_length (get_bh (path), le_ih, pos_in_item (path))); + return M_CUT; +} + + +/* If the path points to a directory or direct item, calculate mode and the size cut, for balance. + If the path points to an indirect item, remove some number of its unformatted nodes. + In case of file truncate calculate whether this item must be deleted/truncated or last + unformatted node of this item will be converted to a direct item. + This function returns a determination of what balance mode the calling function should employ. */ +static char prepare_for_delete_or_cut( + struct reiserfs_transaction_handle *th, + struct inode * inode, + struct path * p_s_path, + struct cpu_key * p_s_item_key, + int * p_n_removed, /* Number of unformatted nodes which were removed + from end of the file. */ + int * p_n_cut_size, + unsigned long long n_new_file_length /* MAX_KEY_OFFSET in case of delete. */ + ) { + struct super_block * p_s_sb = inode->i_sb; + struct item_head * p_le_ih = PATH_PITEM_HEAD(p_s_path); + struct buffer_head * p_s_bh = PATH_PLAST_BUFFER(p_s_path); + +#ifdef CONFIG_REISERFS_CHECK + int n_repeat_counter = 0; +#endif + + /* Stat_data item. */ + if ( is_statdata_le_ih (p_le_ih) ) { + +#ifdef CONFIG_REISERFS_CHECK + if ( n_new_file_length != max_reiserfs_offset (inode) ) + reiserfs_panic(p_s_sb, "PAP-5210: prepare_for_delete_or_cut: mode must be M_DELETE"); +#endif + + *p_n_cut_size = -(IH_SIZE + le16_to_cpu (p_le_ih->ih_item_len)); + return M_DELETE; + } + + + /* Directory item. */ + if ( is_direntry_le_ih (p_le_ih) ) + return prepare_for_direntry_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size); + + /* Direct item. */ + if ( is_direct_le_ih (p_le_ih) ) + return prepare_for_direct_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size); + + + /* Case of an indirect item. */ + { + int n_unfm_number, /* Number of the item unformatted nodes. */ + n_counter, + n_retry, /* Set to one if there is unformatted node buffer in use. */ + n_blk_size; + __u32 * p_n_unfm_pointer; /* Pointer to the unformatted node number. */ + __u32 tmp; + struct item_head s_ih; /* Item header. */ + char c_mode; /* Returned mode of the balance. */ + struct buffer_head * p_s_un_bh; + int need_research; + + + n_blk_size = p_s_sb->s_blocksize; + + /* Search for the needed object indirect item until there are no unformatted nodes to be removed. */ + do { + need_research = 0; + p_s_bh = PATH_PLAST_BUFFER(p_s_path); + /* Copy indirect item header to a temp variable. */ + copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); + /* Calculate number of unformatted nodes in this item. */ + n_unfm_number = I_UNFM_NUM(&s_ih); + +#ifdef CONFIG_REISERFS_CHECK + if ( ! is_indirect_le_ih(&s_ih) || ! n_unfm_number || + pos_in_item (p_s_path) + 1 != n_unfm_number ) { + printk("n_unfm_number = %d *p_n_pos_in_item = %d\n",n_unfm_number, pos_in_item (p_s_path)); + reiserfs_panic(p_s_sb, "PAP-5240: prepare_for_delete_or_cut: illegal item %h", &s_ih); + } +#endif + + /* Calculate balance mode and position in the item to remove unformatted nodes. */ + if ( n_new_file_length == max_reiserfs_offset (inode) ) {/* Case of delete. */ + pos_in_item (p_s_path) = 0; + *p_n_cut_size = -(IH_SIZE + le16_to_cpu (s_ih.ih_item_len)); + c_mode = M_DELETE; + } + else { /* Case of truncate. */ + if ( n_new_file_length < le_ih_k_offset (&s_ih) ) { + pos_in_item (p_s_path) = 0; + *p_n_cut_size = -(IH_SIZE + le16_to_cpu (s_ih.ih_item_len)); + c_mode = M_DELETE; /* Delete this item. */ + } + else { + /* indirect item must be truncated starting from *p_n_pos_in_item-th position */ + pos_in_item (p_s_path) = (n_new_file_length + n_blk_size - le_ih_k_offset (&s_ih) ) >> p_s_sb->s_blocksize_bits; + +#ifdef CONFIG_REISERFS_CHECK + if ( pos_in_item (p_s_path) > n_unfm_number ) + reiserfs_panic(p_s_sb, "PAP-5250: prepare_for_delete_or_cut: illegal position in the item"); +#endif + + /* Either convert last unformatted node of indirect item to direct item or increase + its free space. */ + if ( pos_in_item (p_s_path) == n_unfm_number ) { + *p_n_cut_size = 0; /* Nothing to cut. */ + return M_CONVERT; /* Maybe convert last unformatted node to the direct item. */ + } + /* Calculate size to cut. */ + *p_n_cut_size = -(s_ih.ih_item_len - pos_in_item (p_s_path) * UNFM_P_SIZE); + + c_mode = M_CUT; /* Cut from this indirect item. */ + } + } + +#ifdef CONFIG_REISERFS_CHECK + if ( n_unfm_number <= pos_in_item (p_s_path) ) + reiserfs_panic(p_s_sb, "PAP-5260: prepare_for_delete_or_cut: illegal position in the indirect item"); +#endif + + /* pointers to be cut */ + n_unfm_number -= pos_in_item (p_s_path); + /* Set pointer to the last unformatted node pointer that is to be cut. */ + p_n_unfm_pointer = (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1 - *p_n_removed; + + + /* We go through the unformatted nodes pointers of the indirect + item and look for the unformatted nodes in the cache. If we + found some of them we free it, zero corresponding indirect item + entry and log buffer containing that indirect item. For this we + need to prepare last path element for logging. If some + unformatted node has b_count > 1 we must not free this + unformatted node since it is in use. */ + reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1); + // note: path could be changed, first line in for loop takes care + // of it + + for ( n_retry = 0, n_counter = *p_n_removed; + n_counter < n_unfm_number; n_counter++, p_n_unfm_pointer-- ) { + + if (item_moved (&s_ih, p_s_path)) { + need_research = 1 ; + break; + } +#ifdef CONFIG_REISERFS_CHECK + if (p_n_unfm_pointer < (__u32 *)B_I_PITEM(p_s_bh, &s_ih) || + p_n_unfm_pointer > (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1) + reiserfs_panic (p_s_sb, "vs-5265: prepare_for_delete_or_cut: pointer out of range"); +#endif + + if ( ! *p_n_unfm_pointer ) { /* Hole, nothing to remove. */ + if ( ! n_retry ) + (*p_n_removed)++; + continue; + } + /* Search for the buffer in cache. */ + p_s_un_bh = get_hash_table(p_s_sb->s_dev, *p_n_unfm_pointer, n_blk_size); + + if (p_s_un_bh && buffer_locked(p_s_un_bh)) { + __wait_on_buffer(p_s_un_bh) ; + if ( item_moved (&s_ih, p_s_path) ) { + need_research = 1; + brelse(p_s_un_bh) ; + break ; + } + } + if ( p_s_un_bh && block_in_use (p_s_un_bh)) { + /* Block is locked or held more than by one holder and by + journal. */ + +#ifndef __KERNEL__ + reiserfs_panic(p_s_sb, "PAP-5270: prepare_for_delete_or_cut: b_count != 1"); +#endif + +#ifdef CONFIG_REISERFS_CHECK + if (n_repeat_counter && (n_repeat_counter % 100000) == 0) { + printk("prepare_for_delete, waiting on buffer %lu, b_count %d, %s%cJDIRTY %cJDIRTY_WAIT\n", + p_s_un_bh->b_blocknr, atomic_read (&p_s_un_bh->b_count), + buffer_locked (p_s_un_bh) ? "locked, " : "", + buffer_journaled(p_s_un_bh) ? ' ' : '!', + buffer_journal_dirty(p_s_un_bh) ? ' ' : '!') ; + + } +#endif + n_retry = 1; + brelse (p_s_un_bh); + continue; + } + + if ( ! n_retry ) + (*p_n_removed)++; + +#ifdef CONFIG_REISERFS_CHECK + if ( p_s_un_bh && (*p_n_unfm_pointer != p_s_un_bh->b_blocknr )) + // note: minix_truncate allows that. As truncate is + // protected by down (inode->i_sem), two truncates can not + // co-exist + reiserfs_panic(p_s_sb, "PAP-5280: prepare_for_delete_or_cut: blocks numbers are different"); +#endif + + tmp = *p_n_unfm_pointer; + *p_n_unfm_pointer = 0; + journal_mark_dirty (th, p_s_sb, p_s_bh); + bforget (p_s_un_bh); + inode->i_blocks -= p_s_sb->s_blocksize / 512; + reiserfs_free_block(th, tmp); + if ( item_moved (&s_ih, p_s_path) ) { + need_research = 1; + break ; +#if 0 + reiserfs_prepare_for_journal(p_s_sb, + PATH_PLAST_BUFFER(p_s_path), + 1) ; + if ( comp_items(&s_ih, p_s_path) ) { + reiserfs_restore_prepared_buffer(p_s_sb, + PATH_PLAST_BUFFER(p_s_path)) ; + brelse(p_s_un_bh); + break; + } + *p_n_unfm_pointer = 0; + journal_mark_dirty (th,p_s_sb,PATH_PLAST_BUFFER(p_s_path)); + + reiserfs_free_block(th, p_s_sb, block_addr); + if (p_s_un_bh) { + mark_buffer_clean (p_s_un_bh); + brelse (p_s_un_bh); + } + if ( comp_items(&s_ih, p_s_path) ) { + break ; + } +#endif + } + + } + + /* a trick. If the buffer has been logged, this + ** will do nothing. If we've broken the loop without + ** logging it, it will restore the buffer + ** + */ + reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh); + + if ( n_retry ) { + /* There is block in use. Wait, they should release it soon */ + +#ifdef CONFIG_REISERFS_CHECK + if ( *p_n_removed >= n_unfm_number ) + reiserfs_panic(p_s_sb, "PAP-5290: prepare_for_delete_or_cut: illegal case"); + if ( !(++n_repeat_counter % 500000) ) { + reiserfs_warning("PAP-5300: prepare_for_delete_or_cut: (pid %u): " + "could not delete item %k in (%d) iterations. New file length %Lu. (inode %Ld), Still trying\n", + current->pid, p_s_item_key, n_repeat_counter, n_new_file_length, inode->i_size); + if (n_repeat_counter == 5000000) { + print_block (PATH_PLAST_BUFFER(p_s_path), 3, + PATH_LAST_POSITION (p_s_path) - 2, PATH_LAST_POSITION (p_s_path) + 2); + reiserfs_panic(p_s_sb, "PAP-5305: prepare_for_delete_or_cut: key %k, new_file_length %Ld", + p_s_item_key, n_new_file_length); + } + } +#endif + +#ifdef __KERNEL__ + run_task_queue(&tq_disk); + current->policy |= SCHED_YIELD; + schedule(); +#endif + } + /* This loop can be optimized. */ + } while ( (*p_n_removed < n_unfm_number || need_research) && + search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND ); + +#ifdef CONFIG_REISERFS_CHECK + if ( *p_n_removed < n_unfm_number ) + reiserfs_panic(p_s_sb, "PAP-5310: prepare_for_delete_or_cut: indirect item is not found"); + + if (item_moved (&s_ih, p_s_path) ) { + printk("prepare_for_delete_or_cut: after while, comp failed, retry\n") ; + BUG (); + } +#endif + + if (c_mode == M_CUT) + pos_in_item (p_s_path) *= UNFM_P_SIZE; + return c_mode; + } +} + + +/* Calculate bytes number which will be deleted or cutted in the balance. */ +int calc_deleted_bytes_number( + struct tree_balance * p_s_tb, + char c_mode + ) { + int n_del_size; + struct item_head * p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path); + + if ( is_statdata_le_ih (p_le_ih) ) + return 0; + + if ( is_direntry_le_ih (p_le_ih) ) { + // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */ + // we can't use EMPTY_DIR_SIZE, as old format dirs have a different + // empty size. ick. FIXME, is this right? + // + return le16_to_cpu(p_le_ih->ih_item_len) ; + } + n_del_size = ( c_mode == M_DELETE ) ? le16_to_cpu (p_le_ih->ih_item_len) : -p_s_tb->insert_size[0]; + + if ( is_indirect_le_ih (p_le_ih) ) + n_del_size = (n_del_size/UNFM_P_SIZE)* + (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size);// - get_ih_free_space (p_le_ih); + return n_del_size; +} + +static void init_tb_struct( + struct reiserfs_transaction_handle *th, + struct tree_balance * p_s_tb, + struct super_block * p_s_sb, + struct path * p_s_path, + int n_size + ) { + memset (p_s_tb,'\0',sizeof(struct tree_balance)); + p_s_tb->transaction_handle = th ; + p_s_tb->tb_sb = p_s_sb; + p_s_tb->tb_path = p_s_path; + PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL; + PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0; + p_s_tb->insert_size[0] = n_size; +} + + + +void padd_item (char * item, int total_length, int length) +{ + int i; + + for (i = total_length; i > length; ) + item [--i] = 0; +} + + +/* Delete object item. */ +int reiserfs_delete_item (struct reiserfs_transaction_handle *th, + struct path * p_s_path, /* Path to the deleted item. */ + struct cpu_key * p_s_item_key, /* Key to search for the deleted item. */ + struct inode * p_s_inode,/* inode is here just to update i_blocks */ + struct buffer_head * p_s_un_bh) /* NULL or unformatted node pointer. */ +{ + struct super_block * p_s_sb = p_s_inode->i_sb; + struct tree_balance s_del_balance; + struct item_head s_ih; + int n_ret_value, + n_del_size, + n_removed; + +#ifdef CONFIG_REISERFS_CHECK + char c_mode; + int n_iter = 0; +#endif + + init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path, 0/*size is unknown*/); + + while ( 1 ) { + n_removed = 0; + +#ifdef CONFIG_REISERFS_CHECK + n_iter++; + c_mode = +#endif + prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed, &n_del_size, max_reiserfs_offset (p_s_inode)); + +#ifdef CONFIG_REISERFS_CHECK + if ( c_mode != M_DELETE ) + reiserfs_panic(p_s_sb, "PAP-5320: reiserfs_delete_item: mode must be M_DELETE"); +#endif + + copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); + s_del_balance.insert_size[0] = n_del_size; + + n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, 0); + if ( n_ret_value != REPEAT_SEARCH ) + break; + + // file system changed, repeat search + n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path); + if (n_ret_value == IO_ERROR) + break; + if (n_ret_value == FILE_NOT_FOUND) { + reiserfs_warning ("vs-5340: reiserfs_delete_item: " + "no items of the file %K found\n", p_s_item_key); + break; + } + } /* while (1) */ + + if ( n_ret_value != CARRY_ON ) { + unfix_nodes(&s_del_balance); + return 0; + } + + // reiserfs_delete_item returns item length when success + n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); + + if ( p_s_un_bh ) { + int off; + + /* We are in direct2indirect conversion, so move tail contents + to the unformatted node */ + /* note, we do the copy before preparing the buffer because we + ** don't care about the contents of the unformatted node yet. + ** the only thing we really care about is the direct item's data + ** is in the unformatted node. + ** + ** Otherwise, we would have to call reiserfs_prepare_for_journal on + ** the unformatted node, which might schedule, meaning we'd have to + ** loop all the way back up to the start of the while loop. + ** + ** The unformatted node is prepared and logged after the do_balance. + ** -clm + */ + + off = ((le_ih_k_offset (&s_ih) - 1) & (p_s_sb->s_blocksize - 1)); + memcpy(p_s_un_bh->b_data + off, + B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih), n_ret_value); + + /* clear out the rest of the block past the end of the file. */ + if (off + n_ret_value < p_s_un_bh->b_size) { + memset(p_s_un_bh->b_data + off + n_ret_value, 0, + p_s_un_bh->b_size - off - n_ret_value) ; + } + } + + /* Perform balancing after all resources have been collected at once. */ + do_balance(&s_del_balance, NULL, NULL, M_DELETE); + + /* see comment above for why this is after the do_balance */ + if (p_s_un_bh) { + mark_buffer_dirty(p_s_un_bh) ; +#if 0 + if (reiserfs_dont_log(p_s_sb)) { + mark_buffer_dirty(p_s_un_bh, 0) ; + } else { + if (p_s_un_bh->b_end_io == reiserfs_end_buffer_io_sync) { + reiserfs_prepare_for_journal(p_s_sb, p_s_un_bh, 1) ; + journal_mark_dirty(th, p_s_sb, p_s_un_bh) ; + } else { + mark_buffer_dirty(p_s_un_bh, 0) ; + } + } +#endif + } + + /* Return deleted body length */ + return n_ret_value; +} + + +/* Summary Of Mechanisms For Handling Collisions Between Processes: + + deletion of the body of the object is performed by iput(), with the + result that if multiple processes are operating on a file, the + deletion of the body of the file is deferred until the last process + that has an open inode performs its iput(). + + writes and truncates are protected from collisions by use of + semaphores. + + creates, linking, and mknod are protected from collisions with other + processes by making the reiserfs_add_entry() the last step in the + creation, and then rolling back all changes if there was a collision. + - Hans +*/ + + +/* this deletes item which never gets split */ +static void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th, + struct key * key) +{ + struct tree_balance tb; + INITIALIZE_PATH (path); + int item_len; + int tb_init = 0 ; + struct cpu_key cpu_key; + int retval; + + le_key2cpu_key (&cpu_key, key); + + while (1) { + retval = search_item (th->t_super, &cpu_key, &path); + if (retval == IO_ERROR) { + reiserfs_warning ("vs-: reiserfs_delete_solid_item: " + "i/o failure occured trying to delete %K\n", &cpu_key); + break; + } + if (retval != ITEM_FOUND) { + pathrelse (&path); + reiserfs_warning ("vs-: reiserfs_delete_solid_item: %k not found", + key); + break; + } + if (!tb_init) { + tb_init = 1 ; + item_len = le16_to_cpu (PATH_PITEM_HEAD (&path)->ih_item_len); + init_tb_struct (th, &tb, th->t_super, &path, - (IH_SIZE + item_len)); + } + + retval = fix_nodes (M_DELETE, &tb, NULL, 0); + if (retval == REPEAT_SEARCH) + continue; + + if (retval == CARRY_ON) { + do_balance (&tb, 0, 0, M_DELETE); + break; + } + + // IO_ERROR, NO_DISK_SPACE, etc + reiserfs_warning ("vs-: reiserfs_delete_solid_item: " + "could not delete %K due to fix_nodes failure\n", &cpu_key); + unfix_nodes (&tb); + break; + } + + reiserfs_check_path(&path) ; +} + + +void reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode * inode) +{ + inode->i_size = 0; + + /* for directory this deletes item containing "." and ".." */ + reiserfs_do_truncate (th, inode, NULL, 0/*no timestamp updates*/); + + /* delete stat data */ + /* this debug code needs to go away. Trying to find a truncate race + ** -- clm -- 4/1/2000 + */ +#if 0 + if (inode->i_nlink != 0) { + reiserfs_warning("clm-4001: deleting inode with link count==%d\n", inode->i_nlink) ; + } +#endif + reiserfs_delete_solid_item (th, INODE_PKEY (inode)); +} + + +static int maybe_indirect_to_direct (struct reiserfs_transaction_handle *th, + struct inode * p_s_inode, + struct page *page, + struct path * p_s_path, + struct cpu_key * p_s_item_key, + loff_t n_new_file_size, + char * p_c_mode + ) { + struct super_block * p_s_sb = p_s_inode->i_sb; + int n_block_size = p_s_sb->s_blocksize; + int cut_bytes; + + if (n_new_file_size != p_s_inode->i_size) + BUG (); + + /* the page being sent in could be NULL if there was an i/o error + ** reading in the last block. The user will hit problems trying to + ** read the file, but for now we just skip the indirect2direct + */ + if (atomic_read(&p_s_inode->i_count) > 1 || + !tail_has_to_be_packed (p_s_inode) || + !page || p_s_inode->u.reiserfs_i.nopack) { + // leave tail in an unformatted node + *p_c_mode = M_SKIP_BALANCING; + cut_bytes = n_block_size - (n_new_file_size & (n_block_size - 1)); + pathrelse(p_s_path); + return cut_bytes; + } + /* Permorm the conversion to a direct_item. */ + /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode);*/ + return indirect2direct (th, p_s_inode, page, p_s_path, p_s_item_key, n_new_file_size, p_c_mode); +} + + +/* we did indirect_to_direct conversion. And we have inserted direct + item successesfully, but there were no disk space to cut unfm + pointer being converted. Therefore we have to delete inserted + direct item(s) */ +static void indirect_to_direct_roll_back (struct reiserfs_transaction_handle *th, struct inode * inode, struct path * path) +{ + struct cpu_key tail_key; + int tail_len; + int removed; + + make_cpu_key (&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);// !!!! + tail_key.key_length = 4; + + tail_len = (cpu_key_k_offset (&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1; + while (tail_len) { + /* look for the last byte of the tail */ + if (search_for_position_by_key (inode->i_sb, &tail_key, path) == POSITION_NOT_FOUND) + reiserfs_panic (inode->i_sb, "vs-5615: indirect_to_direct_roll_back: found invalid item"); +#ifdef CONFIG_REISERFS_CHECK + if (path->pos_in_item != PATH_PITEM_HEAD (path)->ih_item_len - 1) + reiserfs_panic (inode->i_sb, "vs-5616: indirect_to_direct_roll_back: appended bytes found"); +#endif + PATH_LAST_POSITION (path) --; + + removed = reiserfs_delete_item (th, path, &tail_key, inode, 0/*unbh not needed*/); +#ifdef CONFIG_REISERFS_CHECK + if (removed <= 0 || removed > tail_len) + reiserfs_panic (inode->i_sb, "vs-5617: indirect_to_direct_roll_back: " + "there was tail %d bytes, removed item length %d bytes", + tail_len, removed); +#endif + tail_len -= removed; + set_cpu_key_k_offset (&tail_key, cpu_key_k_offset (&tail_key) - removed); + } + printk ("indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space\n"); + //mark_file_without_tail (inode); + mark_inode_dirty (inode); +} + + +/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */ +int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th, + struct path * p_s_path, + struct cpu_key * p_s_item_key, + struct inode * p_s_inode, + struct page *page, + loff_t n_new_file_size) +{ + struct super_block * p_s_sb = p_s_inode->i_sb; + /* Every function which is going to call do_balance must first + create a tree_balance structure. Then it must fill up this + structure by using the init_tb_struct and fix_nodes functions. + After that we can make tree balancing. */ + struct tree_balance s_cut_balance; + int n_cut_size = 0, /* Amount to be cut. */ + n_ret_value = CARRY_ON, + n_removed = 0, /* Number of the removed unformatted nodes. */ + n_is_inode_locked = 0; + char c_mode; /* Mode of the balance. */ + int retval2 = -1; + + + init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path, n_cut_size); + + + /* Repeat this loop until we either cut the item without needing + to balance, or we fix_nodes without schedule occuring */ + while ( 1 ) { + /* Determine the balance mode, position of the first byte to + be cut, and size to be cut. In case of the indirect item + free unformatted nodes which are pointed to by the cut + pointers. */ + + c_mode = prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed, + &n_cut_size, n_new_file_size); + if ( c_mode == M_CONVERT ) { + /* convert last unformatted node to direct item or leave + tail in the unformatted node */ +#ifdef CONFIG_REISERFS_CHECK + if ( n_ret_value != CARRY_ON ) + reiserfs_panic (p_s_sb, "PAP-5570: reiserfs_cut_from_item: can not convert twice"); +#endif + + n_ret_value = maybe_indirect_to_direct (th, p_s_inode, page, p_s_path, p_s_item_key, + n_new_file_size, &c_mode); + if ( c_mode == M_SKIP_BALANCING ) + /* tail has been left in the unformatted node */ + return n_ret_value; + + n_is_inode_locked = 1; + + /* removing of last unformatted node will change value we + have to return to truncate. Save it */ + retval2 = n_ret_value; + /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1));*/ + + /* So, we have performed the first part of the conversion: + inserting the new direct item. Now we are removing the + last unformatted node pointer. Set key to search for + it. */ + set_cpu_key_k_type (p_s_item_key, TYPE_INDIRECT); + p_s_item_key->key_length = 4; + n_new_file_size -= (n_new_file_size & (p_s_sb->s_blocksize - 1)); + set_cpu_key_k_offset (p_s_item_key, n_new_file_size + 1); + if ( search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND ){ + print_block (PATH_PLAST_BUFFER (p_s_path), 3, PATH_LAST_POSITION (p_s_path) - 1, PATH_LAST_POSITION (p_s_path) + 1); + reiserfs_panic(p_s_sb, "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%k)", p_s_item_key); + } + continue; + } + if (n_cut_size == 0) { + pathrelse (p_s_path); + return 0; + } + + s_cut_balance.insert_size[0] = n_cut_size; + + n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, 0); + if ( n_ret_value != REPEAT_SEARCH ) + break; + + n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path); + if (n_ret_value == POSITION_FOUND) + continue; + + reiserfs_warning ("PAP-5610: reiserfs_cut_from_item: item %K not found\n", p_s_item_key); + pathrelse (p_s_path); + return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT; + } /* while */ + + // check fix_nodes results (IO_ERROR or NO_DISK_SPACE) + if ( n_ret_value != CARRY_ON ) { + if ( n_is_inode_locked ) { + // FIXME: this seems to be not needed: we are always able + // to cut item + indirect_to_direct_roll_back (th, p_s_inode, p_s_path); + } + if (n_ret_value == NO_DISK_SPACE) + reiserfs_warning (""); + unfix_nodes (&s_cut_balance); + return -EIO; + } + + /* go ahead and perform balancing */ + +#ifdef CONFIG_REISERFS_CHECK + if ( c_mode == M_PASTE || c_mode == M_INSERT ) + reiserfs_panic (p_s_sb, "PAP-5640: reiserfs_cut_from_item: illegal mode"); +#endif + + /* Calculate number of bytes that need to be cut from the item. */ + if (retval2 == -1) + n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode); + else + n_ret_value = retval2; + + if ( c_mode == M_DELETE ) { + struct item_head * p_le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path); + + if ( is_direct_le_ih (p_le_ih) && (le_ih_k_offset (p_le_ih) & (p_s_sb->s_blocksize - 1)) == 1 ) { + /* we delete first part of tail which was stored in direct + item(s) */ + // FIXME: this is to keep 3.5 happy + p_s_inode->u.reiserfs_i.i_first_direct_byte = U32_MAX; + p_s_inode->i_blocks -= p_s_sb->s_blocksize / 512; + } + } + +#ifdef CONFIG_REISERFS_CHECK + if (n_is_inode_locked) { + struct item_head * le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path); + /* we are going to complete indirect2direct conversion. Make + sure, that we exactly remove last unformatted node pointer + of the item */ + if (!is_indirect_le_ih (le_ih)) + reiserfs_panic (p_s_sb, "vs-5652: reiserfs_cut_from_item: " + "item must be indirect %h", le_ih); + + if (c_mode == M_DELETE && le16_to_cpu (le_ih->ih_item_len) != UNFM_P_SIZE) + reiserfs_panic (p_s_sb, "vs-5653: reiserfs_cut_from_item: " + "completing indirect2direct conversion indirect item %h" + "being deleted must be of 4 byte long", le_ih); + + if (c_mode == M_CUT && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) { + reiserfs_panic (p_s_sb, "vs-5654: reiserfs_cut_from_item: " + "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)", + le_ih, s_cut_balance.insert_size[0]); + } + /* it would be useful to make sure, that right neighboring + item is direct item of this file */ + } +#endif + + do_balance(&s_cut_balance, NULL, NULL, c_mode); + if ( n_is_inode_locked ) { + /* we've converted from indirect to direct, we must remove + ** ourselves from the list of pages that need flushing before + ** this transaction can commit + */ + reiserfs_remove_page_from_flush_list(th, p_s_inode) ; + p_s_inode->u.reiserfs_i.i_pack_on_close = 0 ; + } + return n_ret_value; +} + + +static void truncate_directory (struct reiserfs_transaction_handle *th, struct inode * inode) +{ + if (inode->i_nlink) + reiserfs_warning ("vs-5655: truncate_directory: link count != 0"); + + set_le_key_k_offset (ITEM_VERSION_1, INODE_PKEY (inode), DOT_OFFSET); + set_le_key_k_type (ITEM_VERSION_1, INODE_PKEY (inode), TYPE_DIRENTRY); + reiserfs_delete_solid_item (th, INODE_PKEY (inode)); + + set_le_key_k_offset (ITEM_VERSION_1, INODE_PKEY (inode), SD_OFFSET); + set_le_key_k_type (ITEM_VERSION_1, INODE_PKEY (inode), TYPE_STAT_DATA); +} + + + + +/* Truncate file to the new size. Note, this must be called with a transaction + already started */ +void reiserfs_do_truncate (struct reiserfs_transaction_handle *th, + struct inode * p_s_inode, /* ->i_size contains new + size */ + struct page *page, /* up to date for last block */ + int update_timestamps /* when it is called by + file_release to convert + the tail - no timestamps + should be updated */ + ) { + INITIALIZE_PATH (s_search_path); /* Path to the current object item. */ + struct item_head * p_le_ih; /* Pointer to an item header. */ + struct cpu_key s_item_key; /* Key to search for a previous file item. */ + loff_t n_file_size, /* Old file size. */ + n_new_file_size;/* New file size. */ + int n_deleted; /* Number of deleted or truncated bytes. */ + int retval; + + if ( ! (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode) || S_ISLNK(p_s_inode->i_mode)) ) + return; + + if (S_ISDIR(p_s_inode->i_mode)) { + // deletion of directory - no need to update timestamps + truncate_directory (th, p_s_inode); + return; + } + + /* Get new file size. */ + n_new_file_size = p_s_inode->i_size; + + // FIXME: note, that key type is unimportant here + make_cpu_key (&s_item_key, p_s_inode, max_reiserfs_offset (p_s_inode), TYPE_DIRECT, 3); + + retval = search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path); + if (retval == IO_ERROR) { + reiserfs_warning ("vs-5657: reiserfs_do_truncate: " + "i/o failure occured trying to truncate %K\n", &s_item_key); + return; + } + if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) { + reiserfs_warning ("PAP-5660: reiserfs_do_truncate: " + "wrong result %d of search for %K\n", &s_item_key); + return; + } + + s_search_path.pos_in_item --; + + /* Get real file size (total length of all file items) */ + p_le_ih = PATH_PITEM_HEAD(&s_search_path); + if ( is_statdata_le_ih (p_le_ih) ) + n_file_size = 0; + else { + loff_t offset = le_ih_k_offset (p_le_ih); + int bytes = op_bytes_number (p_le_ih,p_s_inode->i_sb->s_blocksize); + + /* this may mismatch with real file size: if last direct item + had no padding zeros and last unformatted node had no free + space, this file would have this file size */ + n_file_size = offset + bytes - 1; + } + + if ( n_file_size == 0 || n_file_size < n_new_file_size ) { + pathrelse(&s_search_path); + return; + } + /* Update key to search for the last file item. */ + set_cpu_key_k_offset (&s_item_key, n_file_size); + + do { + /* Cut or delete file item. */ + n_deleted = reiserfs_cut_from_item(th, &s_search_path, &s_item_key, p_s_inode, page, n_new_file_size); + if (n_deleted < 0) { + reiserfs_warning ("vs-5665: reiserfs_truncate_file: cut_from_item failed"); + reiserfs_check_path(&s_search_path) ; + return; + } + +#ifdef CONFIG_REISERFS_CHECK + if ( n_deleted > n_file_size ){ + reiserfs_panic (p_s_inode->i_sb, "PAP-5670: reiserfs_truncate_file: " + "reiserfs_truncate_file returns too big number: deleted %d, file_size %lu, item_key %k", + n_deleted, n_file_size, &s_item_key); + } +#endif + + /* Change key to search the last file item. */ + n_file_size -= n_deleted; + + set_cpu_key_k_offset (&s_item_key, n_file_size); + + /* While there are bytes to truncate and previous file item is presented in the tree. */ + + /* + ** This loop could take a really long time, and could log + ** many more blocks than a transaction can hold. So, we do a polite + ** journal end here, and if the transaction needs ending, we make + ** sure the file is consistent before ending the current trans + ** and starting a new one + */ + if (journal_transaction_should_end(th, th->t_blocks_allocated)) { + int orig_len_alloc = th->t_blocks_allocated ; + decrement_counters_in_path(&s_search_path) ; + + if (update_timestamps) { + p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME; + // FIXME: sd gets wrong size here + } + reiserfs_update_sd(th, p_s_inode) ; + + journal_end(th, p_s_inode->i_sb, orig_len_alloc) ; + journal_begin(th, p_s_inode->i_sb, orig_len_alloc) ; + } + } while ( n_file_size > ROUND_UP (n_new_file_size) && + search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path) == POSITION_FOUND ) ; + +#ifdef CONFIG_REISERFS_CHECK + if ( n_file_size > ROUND_UP (n_new_file_size) ) + reiserfs_panic (p_s_inode->i_sb, "PAP-5680: reiserfs_truncate_file: " + "truncate did not finish: new_file_size %Ld, current %Ld, oid %d\n", + n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid); +#endif + + if (update_timestamps) { + // this is truncate, not file closing + p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME; + } + reiserfs_update_sd (th, p_s_inode); + + pathrelse(&s_search_path) ; +} + + +// this makes sure, that we __append__, not overwrite or add holes +static void check_research_for_paste (struct path * path, struct cpu_key * p_s_key) +{ + struct item_head * found_ih = get_ih (path); + + if (is_direct_le_ih (found_ih)) { + if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_bh (path)->b_size) != + cpu_key_k_offset (p_s_key) || + op_bytes_number (found_ih, get_bh (path)->b_size) != pos_in_item (path)) + reiserfs_panic (0, "PAP-5720: check_research_for_paste: " + "found direct item %h or position (%d) does not match to key %K", + found_ih, pos_in_item (path), p_s_key); + } + if (is_indirect_le_ih (found_ih)) { + if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_bh (path)->b_size) != cpu_key_k_offset (p_s_key) || + I_UNFM_NUM (found_ih) != pos_in_item (path) || + get_ih_free_space (found_ih) != 0) + reiserfs_panic (0, "PAP-5730: check_research_for_paste: " + "found indirect item (%h) or position (%d) does not match to key (%K)", + found_ih, pos_in_item (path), p_s_key); + } +} + + +/* Paste bytes to the existing item. Returns bytes number pasted into the item. */ +int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th, + struct path * p_s_search_path, /* Path to the pasted item. */ + struct cpu_key * p_s_key, /* Key to search for the needed item.*/ + const char * p_c_body, /* Pointer to the bytes to paste. */ + int n_pasted_size) /* Size of pasted bytes. */ +{ + struct tree_balance s_paste_balance; + int retval; + + init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size); + + while ( (retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, p_c_body)) == REPEAT_SEARCH ) { + /* file system changed while we were in the fix_nodes */ + retval = search_for_position_by_key (th->t_super, p_s_key, p_s_search_path); + if (retval == IO_ERROR) + return -EIO; + if (retval == POSITION_FOUND) { + reiserfs_warning ("PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists", p_s_key); + pathrelse (p_s_search_path); + return -EEXIST; + } + +#ifdef CONFIG_REISERFS_CHECK + check_research_for_paste (p_s_search_path, p_s_key); +#endif + } + + /* Perform balancing after all resources are collected by fix_nodes, and + accessing them will not risk triggering schedule. */ + if ( retval == CARRY_ON ) { + do_balance(&s_paste_balance, NULL/*ih*/, p_c_body, M_PASTE); + return 0; + } + + unfix_nodes(&s_paste_balance); + return (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; +} + + +/* Insert new item into the buffer at the path. */ +int reiserfs_insert_item(struct reiserfs_transaction_handle *th, + struct path * p_s_path, /* Path to the inserteded item. */ + struct cpu_key * key, + struct item_head * p_s_ih, /* Pointer to the item header to insert.*/ + const char * p_c_body) /* Pointer to the bytes to insert. */ +{ + struct tree_balance s_ins_balance; + int retval; + + init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + p_s_ih->ih_item_len); + + /* + if (p_c_body == 0) + n_zeros_num = p_s_ih->ih_item_len; + */ + // le_key2cpu_key (&key, &(p_s_ih->ih_key)); + + while ( (retval = fix_nodes(M_INSERT, &s_ins_balance, p_s_ih, p_c_body)) == REPEAT_SEARCH) { + /* file system changed while we were in the fix_nodes */ + retval = search_item (th->t_super, key, p_s_path); + if (retval == IO_ERROR) + return -EIO; + + if (retval == ITEM_FOUND) { + reiserfs_warning ("PAP-5760: reiserfs_insert_item: " + "key %K already exists in the tree\n", key); + pathrelse (p_s_path); + return -EEXIST; + } + } + + /* make balancing after all resources will be collected at a time */ + if ( retval == CARRY_ON ) { + do_balance (&s_ins_balance, p_s_ih, p_c_body, M_INSERT); + return 0; + } + + unfix_nodes(&s_ins_balance); + return (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; +} + + + + diff -u -r --new-file linux/fs/reiserfs/super.c v2.4.0-test8/linux/fs/reiserfs/super.c --- linux/fs/reiserfs/super.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/super.c Thu Aug 10 19:09:04 2000 @@ -0,0 +1,836 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ + +#ifdef __KERNEL__ + +#include <linux/module.h> +#include <linux/sched.h> +#include <asm/uaccess.h> +#include <linux/reiserfs_fs.h> +#include <linux/smp_lock.h> +#include <linux/locks.h> +#include <linux/init.h> + +#else + +#include "nokernel.h" +#include <stdlib.h> // for simple_strtoul + +#endif + +#define SUPPORT_OLD_FORMAT + +#define REISERFS_OLD_BLOCKSIZE 4096 +#define REISERFS_SUPER_MAGIC_STRING_OFFSET_NJ 20 + + +#if 0 +// this one is not used currently +inline void reiserfs_mark_buffer_dirty (struct buffer_head * bh, int flag) +{ + mark_buffer_dirty (bh, flag); +} +#endif + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +void reiserfs_write_super (struct super_block * s) +{ + + int dirty = 0 ; + lock_kernel() ; + if (!(s->s_flags & MS_RDONLY)) { + unlock_super(s) ; + dirty = flush_old_commits(s, 1) ; + lock_super(s) ; + } + s->s_dirt = dirty; + unlock_kernel() ; +} + + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +/* there should be no suspected recipients already. True and cautious + bitmaps should not differ. We only have to free preserve list and + write both bitmaps */ +void reiserfs_put_super (struct super_block * s) +{ + int i; + struct reiserfs_transaction_handle th ; + + /* the end_io task has to call get_super, which locks the super, which + ** will deadlock with the journal. So, we unlock, and then relock + ** when the journal is done. + ** + ** this sucks. + */ + unlock_super(s) ; + journal_begin(&th, s, 10) ; + + /* change file system state to current state if it was mounted with read-write permissions */ + if (!(s->s_flags & MS_RDONLY)) { + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; + s->u.reiserfs_sb.s_rs->s_state = le16_to_cpu (s->u.reiserfs_sb.s_mount_state); + journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); + } + + journal_release(&th, s) ; + lock_super(s) ; + + for (i = 0; i < SB_BMAP_NR (s); i ++) + brelse (SB_AP_BITMAP (s)[i]); + + reiserfs_kfree (SB_AP_BITMAP (s), sizeof (struct buffer_head *) * SB_BMAP_NR (s), s); + + brelse (SB_BUFFER_WITH_SB (s)); + + print_statistics (s); + + if (s->u.reiserfs_sb.s_kmallocs != 0) { + reiserfs_warning ("vs-2004: reiserfs_put_super: aloocated memory left %d\n", + s->u.reiserfs_sb.s_kmallocs); + } + + return; +} + +struct super_operations reiserfs_sops = +{ + read_inode: reiserfs_read_inode, + read_inode2: reiserfs_read_inode2, + write_inode: reiserfs_write_inode, + dirty_inode: reiserfs_dirty_inode, + delete_inode: reiserfs_delete_inode, + put_super: reiserfs_put_super, + write_super: reiserfs_write_super, + statfs: reiserfs_statfs, + remount_fs: reiserfs_remount, +}; + +/* this was (ext2)parse_options */ +static int parse_options (char * options, unsigned long * mount_options, unsigned long * blocks) +{ + char * this_char; + char * value; + + *blocks = 0; + if (!options) + /* use default configuration: complex read, create tails, preserve on */ + return 1; + for (this_char = strtok (options, ","); this_char != NULL; this_char = strtok (NULL, ",")) { + if ((value = strchr (this_char, '=')) != NULL) + *value++ = 0; + if (!strcmp (this_char, "notail")) { + set_bit (NOTAIL, mount_options); + } else if (!strcmp (this_char, "conv")) { + // if this is set, we update super block such that + // the partition will not be mounable by 3.5.x anymore + set_bit (REISERFS_CONVERT, mount_options); + } else if (!strcmp (this_char, "nolog")) { + reiserfs_warning("reiserfs: nolog mount option not supported yet\n"); + } else if (!strcmp (this_char, "replayonly")) { + set_bit (REPLAYONLY, mount_options); + } else if (!strcmp (this_char, "resize")) { + if (!value || !*value){ + printk("reiserfs: resize option requires a value\n"); + } + *blocks = simple_strtoul (value, &value, 0); + } else if (!strcmp (this_char, "hash")) { + if (value && *value) { + /* if they specify any hash option, we force detection + ** to make sure they aren't using the wrong hash + */ + if (!strcmp(value, "rupasov")) { + set_bit (FORCE_RUPASOV_HASH, mount_options); + set_bit (FORCE_HASH_DETECT, mount_options); + } else if (!strcmp(value, "tea")) { + set_bit (FORCE_TEA_HASH, mount_options); + set_bit (FORCE_HASH_DETECT, mount_options); + } else if (!strcmp(value, "r5")) { + set_bit (FORCE_R5_HASH, mount_options); + set_bit (FORCE_HASH_DETECT, mount_options); + } else if (!strcmp(value, "detect")) { + set_bit (FORCE_HASH_DETECT, mount_options); + } else { + printk("reiserfs: invalid hash function specified\n") ; + return 0 ; + } + } else { + printk("reiserfs: hash option requires a value\n"); + return 0 ; + } + } else { + printk ("reiserfs: Unrecognized mount option %s\n", this_char); + return 0; + } + } + return 1; +} + + +int reiserfs_is_super(struct super_block *s) { + return (s->s_dev != 0 && s->s_op == &reiserfs_sops) ; +} + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +int reiserfs_remount (struct super_block * s, int * flags, char * data) +{ + struct reiserfs_super_block * rs; + struct reiserfs_transaction_handle th ; + unsigned long blocks; + unsigned long mount_options; + + rs = SB_DISK_SUPER_BLOCK (s); + + if (!parse_options(data, &mount_options, &blocks)) + return 0; + + if(blocks) + reiserfs_resize(s, blocks); + + if ((unsigned long)(*flags & MS_RDONLY) == (s->s_flags & MS_RDONLY)) { + /* there is nothing to do to remount read-only fs as read-only fs */ + return 0; + } + + if (*flags & MS_RDONLY) { + /* try to remount file system with read-only permissions */ + if (le16_to_cpu (rs->s_state) == REISERFS_VALID_FS || s->u.reiserfs_sb.s_mount_state != REISERFS_VALID_FS) { + return 0; + } + + unlock_super(s) ; + journal_begin(&th, s, 10) ; + lock_super(s) ; + /* Mounting a rw partition read-only. */ + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; + rs->s_state = cpu_to_le16 (s->u.reiserfs_sb.s_mount_state); + journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); + s->s_dirt = 0; + } else { + unlock_super(s) ; + journal_begin(&th, s, 10) ; + lock_super(s) ; + + /* Mount a partition which is read-only, read-write */ + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; + s->u.reiserfs_sb.s_mount_state = le16_to_cpu (rs->s_state); + s->s_flags &= ~MS_RDONLY; + rs->s_state = cpu_to_le16 (REISERFS_ERROR_FS); + /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */ + journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); + s->s_dirt = 0; + s->u.reiserfs_sb.s_mount_state = REISERFS_VALID_FS ; + } + /* this will force a full flush of all journal lists */ + SB_JOURNAL(s)->j_must_wait = 1 ; + unlock_super(s) ; + journal_end(&th, s, 10) ; + lock_super(s) ; + return 0; +} + + +static int read_bitmaps (struct super_block * s) +{ + int i, bmp, dl ; + struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK(s); + + SB_AP_BITMAP (s) = reiserfs_kmalloc (sizeof (struct buffer_head *) * le16_to_cpu (rs->s_bmap_nr), GFP_KERNEL, s); + if (SB_AP_BITMAP (s) == 0) + return 1; + memset (SB_AP_BITMAP (s), 0, sizeof (struct buffer_head *) * le16_to_cpu (rs->s_bmap_nr)); + + /* reiserfs leaves the first 64k unused so that any partition + labeling scheme currently used will have enough space. Then we + need one block for the super. -Hans */ + bmp = (REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1; /* first of bitmap blocks */ + SB_AP_BITMAP (s)[0] = reiserfs_bread (s->s_dev, bmp, s->s_blocksize); + if(!SB_AP_BITMAP(s)[0]) + return 1; + for (i = 1, bmp = dl = rs->s_blocksize * 8; i < le16_to_cpu (rs->s_bmap_nr); i ++) { + SB_AP_BITMAP (s)[i] = reiserfs_bread (s->s_dev, bmp, s->s_blocksize); + if (!SB_AP_BITMAP (s)[i]) + return 1; + bmp += dl; + } + + return 0; +} + +static int read_old_bitmaps (struct super_block * s) +{ + int i ; + struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK(s); + int bmp1 = (REISERFS_OLD_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1; /* first of bitmap blocks */ + + /* read true bitmap */ + SB_AP_BITMAP (s) = reiserfs_kmalloc (sizeof (struct buffer_head *) * le16_to_cpu (rs->s_bmap_nr), GFP_KERNEL, s); + if (SB_AP_BITMAP (s) == 0) + return 1; + + memset (SB_AP_BITMAP (s), 0, sizeof (struct buffer_head *) * le16_to_cpu (rs->s_bmap_nr)); + + for (i = 0; i < le16_to_cpu (rs->s_bmap_nr); i ++) { + SB_AP_BITMAP (s)[i] = reiserfs_bread (s->s_dev, bmp1 + i, s->s_blocksize); + if (!SB_AP_BITMAP (s)[i]) + return 1; + } + + return 0; +} + +void check_bitmap (struct super_block * s) +{ + int i = 0; + int free = 0; + char * buf; + + while (i < SB_BLOCK_COUNT (s)) { + buf = SB_AP_BITMAP (s)[i / (s->s_blocksize * 8)]->b_data; + if (!reiserfs_test_le_bit (i % (s->s_blocksize * 8), buf)) + free ++; + i ++; + } + + if (free != SB_FREE_BLOCKS (s)) + reiserfs_warning ("vs-4000: check_bitmap: %d free blocks, must be %d\n", + free, SB_FREE_BLOCKS (s)); +} + +#ifdef SUPPORT_OLD_FORMAT + +/* support old disk layout */ +static int read_old_super_block (struct super_block * s, int size) +{ + struct buffer_head * bh; + struct reiserfs_super_block * rs; + + printk("read_old_super_block: try to find super block in old location\n"); + /* there are only 4k-sized blocks in v3.5.10 */ + if (size != REISERFS_OLD_BLOCKSIZE) + set_blocksize(s->s_dev, REISERFS_OLD_BLOCKSIZE); + bh = bread (s->s_dev, + REISERFS_OLD_DISK_OFFSET_IN_BYTES / REISERFS_OLD_BLOCKSIZE, + REISERFS_OLD_BLOCKSIZE); + if (!bh) { + printk("read_old_super_block: unable to read superblock on dev %s\n", kdevname(s->s_dev)); + return 1; + } + + rs = (struct reiserfs_super_block *)bh->b_data; + if (strncmp (rs->s_magic, REISERFS_SUPER_MAGIC_STRING, strlen ( REISERFS_SUPER_MAGIC_STRING))) { + /* pre-journaling version check */ + if(!strncmp((char*)rs + REISERFS_SUPER_MAGIC_STRING_OFFSET_NJ, + REISERFS_SUPER_MAGIC_STRING, strlen(REISERFS_SUPER_MAGIC_STRING))) { + printk("read_old_super_blockr: a pre-journaling reiserfs filesystem isn't suitable there.\n"); + brelse(bh); + return 1; + } + + brelse (bh); + printk ("read_old_super_block: can't find a reiserfs filesystem on dev %s.\n", kdevname(s->s_dev)); + return 1; + } + + if(REISERFS_OLD_BLOCKSIZE != le16_to_cpu (rs->s_blocksize)) { + printk("read_old_super_block: blocksize mismatch, super block corrupted\n"); + brelse(bh); + return 1; + } + + s->s_blocksize = REISERFS_OLD_BLOCKSIZE; + s->s_blocksize_bits = 0; + while ((1 << s->s_blocksize_bits) != s->s_blocksize) + s->s_blocksize_bits ++; + + SB_BUFFER_WITH_SB (s) = bh; + SB_DISK_SUPER_BLOCK (s) = rs; + s->s_op = &reiserfs_sops; + return 0; +} +#endif + +// +// FIXME: mounting old filesystems we _must_ change magic string to +// make then unmountable by reiserfs of 3.5.x +// +static int read_super_block (struct super_block * s, int size) +{ + struct buffer_head * bh; + struct reiserfs_super_block * rs; + + bh = bread (s->s_dev, REISERFS_DISK_OFFSET_IN_BYTES / size, size); + if (!bh) { + printk("read_super_block: unable to read superblock on dev %s\n", kdevname(s->s_dev)); + return 1; + } + + rs = (struct reiserfs_super_block *)bh->b_data; + if (!is_reiserfs_magic_string (rs)) { + printk ("read_super_block: can't find a reiserfs filesystem on dev %s\n", + kdevname(s->s_dev)); + brelse (bh); + return 1; + } + + // + // ok, reiserfs signature (old or new) found in 64-th 1k block of + // the device + // + +#ifndef SUPPORT_OLD_FORMAT + // with SUPPORT_OLD_FORMAT undefined - detect old format by + // checking super block version + if (le16_to_cpu (rs->s_version) != REISERFS_VERSION_2) { + brelse (bh); + printk ("read_super_block: unsupported version (%d) of reiserfs found on dev %s\n", + le16_to_cpu (rs->s_version), kdevname(s->s_dev)); + return 1; + } +#endif + + s->s_blocksize = le16_to_cpu (rs->s_blocksize); + s->s_blocksize_bits = 0; + while ((1 << s->s_blocksize_bits) != s->s_blocksize) + s->s_blocksize_bits ++; + + brelse (bh); + + if (s->s_blocksize != size) + set_blocksize (s->s_dev, s->s_blocksize); + bh = reiserfs_bread (s->s_dev, REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize, s->s_blocksize); + if (!bh) { + printk("read_super_block: unable to read superblock on dev %s\n", kdevname(s->s_dev)); + return 1; + } + + rs = (struct reiserfs_super_block *)bh->b_data; + if (!is_reiserfs_magic_string (rs) || + le16_to_cpu (rs->s_blocksize) != s->s_blocksize) { + brelse (bh); + printk ("read_super_block: can't find a reiserfs filesystem on dev %s.\n", kdevname(s->s_dev)); + return 1; + } + /* must check to be sure we haven't pulled an old format super out + ** of the old format's log. This is a kludge of a check, but it + ** will work. If block we've just read in is inside the + ** journal for that super, it can't be valid. + */ + if (bh->b_blocknr >= le32_to_cpu(rs->s_journal_block) && + bh->b_blocknr < (le32_to_cpu(rs->s_journal_block) + JOURNAL_BLOCK_COUNT)) { + brelse(bh) ; + printk("super-459: read_super_block: super found at block %lu is within its own log. " + "It must not be of this format type.\n", bh->b_blocknr) ; + return 1 ; + } + SB_BUFFER_WITH_SB (s) = bh; + SB_DISK_SUPER_BLOCK (s) = rs; + s->s_op = &reiserfs_sops; + return 0; +} + +/* after journal replay, reread all bitmap and super blocks */ +static int reread_meta_blocks(struct super_block *s) { + int i ; + ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))) ; + wait_on_buffer(SB_BUFFER_WITH_SB(s)) ; + if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { + printk("reread_meta_blocks, error reading the super\n") ; + return 1 ; + } + + for (i = 0; i < SB_BMAP_NR(s) ; i++) { + ll_rw_block(READ, 1, &(SB_AP_BITMAP(s)[i])) ; + wait_on_buffer(SB_AP_BITMAP(s)[i]) ; + if (!buffer_uptodate(SB_AP_BITMAP(s)[i])) { + printk("reread_meta_blocks, error reading bitmap block number %d at %ld\n", i, SB_AP_BITMAP(s)[i]->b_blocknr) ; + return 1 ; + } + } + return 0 ; + +} + + +///////////////////////////////////////////////////// +// hash detection stuff + + +// if root directory is empty - we set default - Yura's - hash and +// warn about it +// FIXME: we look for only one name in a directory. If tea and yura +// bith have the same value - we ask user to send report to the +// mailing list +__u32 find_hash_out (struct super_block * s) +{ + int retval; + struct inode * inode; + struct cpu_key key; + INITIALIZE_PATH (path); + struct reiserfs_dir_entry de; + __u32 hash = DEFAULT_HASH; + + inode = s->s_root->d_inode; + + while (1) { + make_cpu_key (&key, inode, ~0, TYPE_DIRENTRY, 3); + retval = search_by_entry_key (s, &key, &path, &de); + if (retval == IO_ERROR) { + pathrelse (&path); + return UNSET_HASH ; + } + if (retval == NAME_NOT_FOUND) + de.de_entry_num --; + set_de_name_and_namelen (&de); + if (le32_to_cpu (de.de_deh[de.de_entry_num].deh_offset) == DOT_DOT_OFFSET) { + /* allow override in this case */ + if (reiserfs_rupasov_hash(s)) { + hash = YURA_HASH ; + } + reiserfs_warning("reiserfs: FS seems to be empty, autodetect " + "is using the default hash\n"); + break; + } + if (GET_HASH_VALUE(yura_hash (de.de_name, de.de_namelen)) == + GET_HASH_VALUE(keyed_hash (de.de_name, de.de_namelen))) { + reiserfs_warning ("reiserfs: Could not detect hash function " + "please mount with -o hash={tea,rupasov,r5}\n") ; + hash = UNSET_HASH ; + break; + } + if (GET_HASH_VALUE(le32_to_cpu(de.de_deh[de.de_entry_num].deh_offset))== + GET_HASH_VALUE (yura_hash (de.de_name, de.de_namelen))) + hash = YURA_HASH; + else + hash = TEA_HASH; + break; + } + + pathrelse (&path); + return hash; +} + +// finds out which hash names are sorted with +static int what_hash (struct super_block * s) +{ + __u32 code; + + code = le32_to_cpu (s->u.reiserfs_sb.s_rs->s_hash_function_code); + + /* reiserfs_hash_detect() == true if any of the hash mount options + ** were used. We must check them to make sure the user isn't + ** using a bad hash value + */ + if (code == UNSET_HASH || reiserfs_hash_detect(s)) + code = find_hash_out (s); + + if (code != UNSET_HASH && reiserfs_hash_detect(s)) { + /* detection has found the hash, and we must check against the + ** mount options + */ + if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { + printk("REISERFS: Error, tea hash detected, " + "unable to force rupasov hash\n") ; + code = UNSET_HASH ; + } else if (reiserfs_tea_hash(s) && code != TEA_HASH) { + printk("REISERFS: Error, rupasov hash detected, " + "unable to force tea hash\n") ; + code = UNSET_HASH ; + } else if (reiserfs_r5_hash(s) && code != R5_HASH) { + printk("REISERFS: Error, r5 hash detected, " + "unable to force r5 hash\n") ; + code = UNSET_HASH ; + } + } else { + /* find_hash_out was not called or could not determine the hash */ + if (reiserfs_rupasov_hash(s)) { + code = YURA_HASH ; + } else if (reiserfs_tea_hash(s)) { + code = TEA_HASH ; + } else if (reiserfs_r5_hash(s)) { + code = R5_HASH ; + } + } + + /* if we are mounted RW, and we have a new valid hash code, update + ** the super + */ + if (code != UNSET_HASH && + !(s->s_flags & MS_RDONLY) && + code != le32_to_cpu (s->u.reiserfs_sb.s_rs->s_hash_function_code)) { + s->u.reiserfs_sb.s_rs->s_hash_function_code = cpu_to_le32(code) ; + } + return code; +} + +// return pointer to appropriate function +static hashf_t hash_function (struct super_block * s) +{ + switch (what_hash (s)) { + case TEA_HASH: + reiserfs_warning ("Using tea hash to sort names\n"); + return keyed_hash; + case YURA_HASH: + reiserfs_warning ("Using rupasov hash to sort names\n"); + return yura_hash; + case R5_HASH: + reiserfs_warning ("Using r5 hash to sort names\n"); + return r5_hash; + } + return NULL; +} + +// this is used to set up correct value for old partitions +int function2code (hashf_t func) +{ + if (func == keyed_hash) + return TEA_HASH; + if (func == yura_hash) + return YURA_HASH; + if (func == r5_hash) + return R5_HASH; + + BUG() ; // should never happen + + return 0; +} + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +struct super_block * reiserfs_read_super (struct super_block * s, void * data, int silent) +{ + int size; + struct inode *root_inode; + kdev_t dev = s->s_dev; + int j; + extern int *blksize_size[]; + struct reiserfs_transaction_handle th ; + int old_format = 0; + unsigned long blocks; + int jinit_done = 0 ; + struct reiserfs_iget4_args args ; + + + memset (&s->u.reiserfs_sb, 0, sizeof (struct reiserfs_sb_info)); + + if (parse_options ((char *) data, &(s->u.reiserfs_sb.s_mount_opt), &blocks) == 0) { + return NULL; + } + + if (blocks) { + printk("reserfs: resize option for remount only\n"); + return NULL; + } + + if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)] != 0) { + /* as blocksize is set for partition we use it */ + size = blksize_size[MAJOR(dev)][MINOR(dev)]; + } else { + size = BLOCK_SIZE; + set_blocksize (s->s_dev, BLOCK_SIZE); + } + + /* read block (64-th 1k block), which can contain reiserfs super block */ + if (read_super_block (s, size)) { +#ifdef SUPPORT_OLD_FORMAT + // try old format (undistributed bitmap, super block in 8-th 1k block of a device) + if(read_old_super_block(s,size)) + goto error; + else + old_format = 1; +#endif + goto error ; + } + + s->u.reiserfs_sb.s_mount_state = le16_to_cpu (SB_DISK_SUPER_BLOCK (s)->s_state); /* journal victim */ + s->u.reiserfs_sb.s_mount_state = REISERFS_VALID_FS ; + + if (old_format ? read_old_bitmaps(s) : read_bitmaps(s)) { + printk ("reiserfs_read_super: unable to read bitmap\n"); + goto error; + } + + if (journal_init(s)) { + printk("reiserfs_read_super: unable to initialize journal space\n") ; + goto error ; + } else { + jinit_done = 1 ; /* once this is set, journal_release must be called + ** if we error out of the mount + */ + } + if (reread_meta_blocks(s)) { + printk("reiserfs_read_super: unable to reread meta blocks after journal init\n") ; + goto error ; + } + + if (replay_only (s)) + goto error; + + args.objectid = REISERFS_ROOT_PARENT_OBJECTID ; + root_inode = iget4 (s, REISERFS_ROOT_OBJECTID, 0, (void *)(&args)); + if (!root_inode) { + printk ("reiserfs_read_super: get root inode failed\n"); + goto error; + } + + s->s_root = d_alloc_root(root_inode); + if (!s->s_root) { + iput(root_inode); + goto error; + } + + // define and initialize hash function + s->u.reiserfs_sb.s_hash_function = hash_function (s); + if (s->u.reiserfs_sb.s_hash_function == NULL) { + dput(s->s_root) ; + s->s_root = NULL ; + goto error ; + } + if (!(s->s_flags & MS_RDONLY)) { + struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); + + journal_begin(&th, s, 1) ; + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; + + rs->s_state = cpu_to_le16 (REISERFS_ERROR_FS); + + if (strncmp (rs->s_magic, REISER2FS_SUPER_MAGIC_STRING, + strlen ( REISER2FS_SUPER_MAGIC_STRING))) { + if (le16_to_cpu(rs->s_version) != 0) + BUG (); + // filesystem created under 3.5.x found + if (!old_format_only (s)) { + reiserfs_warning("reiserfs: converting 3.5.x filesystem to the new format\n") ; + // after this 3.5.x will not be able to mount this partition + memcpy (rs->s_magic, REISER2FS_SUPER_MAGIC_STRING, + sizeof (REISER2FS_SUPER_MAGIC_STRING)); + + reiserfs_convert_objectid_map_v1(s) ; + } else { + reiserfs_warning("reiserfs: using 3.5.x disk format\n") ; + } + } else { + // new format found + set_bit (REISERFS_CONVERT, &(s->u.reiserfs_sb.s_mount_opt)); + } + + // mark hash in super block: it could be unset. overwrite should be ok + rs->s_hash_function_code = cpu_to_le32 (function2code (s->u.reiserfs_sb.s_hash_function)); + + journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); + journal_end(&th, s, 1) ; + s->s_dirt = 0; + } else { + struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); + if (strncmp (rs->s_magic, REISER2FS_SUPER_MAGIC_STRING, + strlen ( REISER2FS_SUPER_MAGIC_STRING))) { + reiserfs_warning("reiserfs: using 3.5.x disk format\n") ; + } + } + /* we have to do this to make journal writes work correctly */ + SB_BUFFER_WITH_SB(s)->b_end_io = reiserfs_end_buffer_io_sync ; + + init_waitqueue_head (&(s->u.reiserfs_sb.s_wait)); + + printk("%s\n", reiserfs_get_version_string()) ; + return s; + + error: + if (jinit_done) { /* kill the commit thread, free journal ram */ + journal_release_error(NULL, s) ; + } + if (SB_DISK_SUPER_BLOCK (s)) { + for (j = 0; j < SB_BMAP_NR (s); j ++) { + if (SB_AP_BITMAP (s)) + brelse (SB_AP_BITMAP (s)[j]); + } + if (SB_AP_BITMAP (s)) + reiserfs_kfree (SB_AP_BITMAP (s), sizeof (struct buffer_head *) * SB_BMAP_NR (s), s); + } + if (SB_BUFFER_WITH_SB (s)) + brelse(SB_BUFFER_WITH_SB (s)); + + return NULL; +} + + +// +// a portion of this function, particularly the VFS interface portion, +// was derived from minix or ext2's analog and evolved as the +// prototype did. You should be able to tell which portion by looking +// at the ext2 code and comparing. It's subfunctions contain no code +// used as a template unless they are so labeled. +// +int reiserfs_statfs (struct super_block * s, struct statfs * buf) +{ + struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); + + /* changed to accomodate gcc folks.*/ + buf->f_type = REISERFS_SUPER_MAGIC; + buf->f_bsize = le32_to_cpu (s->s_blocksize); + buf->f_blocks = le32_to_cpu (rs->s_block_count) - le16_to_cpu (rs->s_bmap_nr) - 1; + buf->f_bfree = le32_to_cpu (rs->s_free_blocks); + buf->f_bavail = buf->f_bfree; + buf->f_files = -1; + buf->f_ffree = -1; + buf->f_namelen = (REISERFS_MAX_NAME_LEN (s->s_blocksize)); + return 0; +} + +#ifdef __KERNEL__ + +static DECLARE_FSTYPE_DEV(reiserfs_fs_type,"reiserfs",reiserfs_read_super); + +// +// this is exactly what 2.3.99-pre9's init_ext2_fs is +// +static int __init init_reiserfs_fs (void) +{ + return register_filesystem(&reiserfs_fs_type); +} + +EXPORT_NO_SYMBOLS; + +// +// this is exactly what 2.3.99-pre9's init_ext2_fs is +// +static void __exit exit_reiserfs_fs(void) +{ + unregister_filesystem(&reiserfs_fs_type); +} + +module_init(init_reiserfs_fs) ; +module_exit(exit_reiserfs_fs) ; + +#endif + + + diff -u -r --new-file linux/fs/reiserfs/tail_conversion.c v2.4.0-test8/linux/fs/reiserfs/tail_conversion.c --- linux/fs/reiserfs/tail_conversion.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/tail_conversion.c Thu Sep 21 12:25:41 2000 @@ -0,0 +1,281 @@ +/* + * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright details + */ + +#ifdef __KERNEL__ + +#include <linux/sched.h> +#include <linux/pagemap.h> +#include <linux/reiserfs_fs.h> +#include <linux/locks.h> + +#else + +#include "nokernel.h" + +#endif + + +/* access to tail : when one is going to read tail it must make sure, that is not running. + direct2indirect and indirect2direct can not run concurrently */ + + +/* Converts direct items to an unformatted node. Panics if file has no + tail. -ENOSPC if no disk space for conversion */ +/* path points to first direct item of the file regarless of how many of + them are there */ +int direct2indirect (struct reiserfs_transaction_handle *th, struct inode * inode, + struct path * path, struct buffer_head * unbh, + loff_t tail_offset) +{ + struct super_block * sb = inode->i_sb; + struct buffer_head *up_to_date_bh ; + struct item_head * p_le_ih = PATH_PITEM_HEAD (path); + struct cpu_key end_key; /* Key to search for the last byte of the + converted item. */ + struct item_head ind_ih; /* new indirect item to be inserted or + key of unfm pointer to be pasted */ + int n_blk_size, + n_retval; /* returned value for reiserfs_insert_item and clones */ + struct unfm_nodeinfo unfm_ptr; /* Handle on an unformatted node + that will be inserted in the + tree. */ + + + sb->u.reiserfs_sb.s_direct2indirect ++; + + n_blk_size = sb->s_blocksize; + + /* and key to search for append or insert pointer to the new + unformatted node. */ + copy_item_head (&ind_ih, p_le_ih); + set_le_ih_k_offset (&ind_ih, tail_offset); + set_le_ih_k_type (&ind_ih, TYPE_INDIRECT); + + /* Set the key to search for the place for new unfm pointer */ + make_cpu_key (&end_key, inode, tail_offset, TYPE_INDIRECT, 4); + + // FIXME: we could avoid this + if ( search_for_position_by_key (sb, &end_key, path) == POSITION_FOUND ) + reiserfs_panic (sb, "PAP-14030: direct2indirect: " + "pasted or inserted byte exists in the tree"); + + p_le_ih = PATH_PITEM_HEAD (path); + + unfm_ptr.unfm_nodenum = cpu_to_le32 (unbh->b_blocknr); + unfm_ptr.unfm_freespace = 0; // ??? + + if ( is_statdata_le_ih (p_le_ih) ) { + /* Insert new indirect item. */ + set_ih_free_space (&ind_ih, 0); /* delete at nearest future */ + ind_ih.ih_item_len = cpu_to_le16 (UNFM_P_SIZE); + PATH_LAST_POSITION (path)++; + n_retval = reiserfs_insert_item (th, path, &end_key, &ind_ih, + (char *)&unfm_ptr); + } else { + /* Paste into last indirect item of an object. */ + n_retval = reiserfs_paste_into_item(th, path, &end_key, + (char *)&unfm_ptr, UNFM_P_SIZE); + } + if ( n_retval ) { + return n_retval; + } + + // note: from here there are two keys which have matching first + // three key components. They only differ by the fourth one. + + + /* Set the key to search for the direct items of the file */ + make_cpu_key (&end_key, inode, max_reiserfs_offset (inode), TYPE_DIRECT, 4); + + /* Move bytes from the direct items to the new unformatted node + and delete them. */ + while (1) { + int item_len, first_direct; + + /* end_key.k_offset is set so, that we will always have found + last item of the file */ + if ( search_for_position_by_key (sb, &end_key, path) == POSITION_FOUND ) + reiserfs_panic (sb, "PAP-14050: direct2indirect: " + "direct item (%k) not found", &end_key); + p_le_ih = PATH_PITEM_HEAD (path); +#ifdef CONFIG_REISERFS_CHECK + if (!is_direct_le_ih (p_le_ih)) + reiserfs_panic (sb, "vs-14055: direct2indirect: " + "direct item expected, found %h", p_le_ih); +#endif + if ((le_ih_k_offset (p_le_ih) & (n_blk_size - 1)) == 1) + first_direct = 1; + else + first_direct = 0; + item_len = le16_to_cpu (p_le_ih->ih_item_len); + + /* we only send the unbh pointer if the buffer is not up to date. + ** this avoids overwriting good data from writepage() with old data + ** from the disk or buffer cache + */ + if (buffer_uptodate(unbh) || Page_Uptodate(unbh->b_page)) { + up_to_date_bh = NULL ; + } else { + up_to_date_bh = unbh ; + } + n_retval = reiserfs_delete_item (th, path, &end_key, inode, + up_to_date_bh) ; + + if (first_direct && item_len == n_retval) + // done: file does not have direct items anymore + break; + + } + + inode->u.reiserfs_i.i_first_direct_byte = U32_MAX; + + return 0; +} + + +/* stolen from fs/buffer.c */ +void reiserfs_unmap_buffer(struct buffer_head *bh) { + if (buffer_mapped(bh)) { + if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { + BUG() ; + } + mark_buffer_clean(bh) ; + wait_on_buffer(bh) ; + // clear_bit(BH_Uptodate, &bh->b_state) ; + clear_bit(BH_Mapped, &bh->b_state) ; + clear_bit(BH_Req, &bh->b_state) ; + clear_bit(BH_New, &bh->b_state) ; + } +} + +static void +unmap_buffers(struct page *page) { + struct buffer_head *bh ; + struct buffer_head *head ; + struct buffer_head *next ; + + /* starting with brute force method, get all the buffers in + ** the page. Since blocksize == 4k == pagesize, this is not + ** a performance hit on intel. + */ + if (page) { + if (page->buffers) { + head = page->buffers ; + bh = head ; + do { + next = bh->b_this_page ; + reiserfs_unmap_buffer(bh) ; + bh = next ; + } while (bh != head) ; + } + } +} + +/* this first locks inode (neither reads nor sync are permitted), + reads tail through page cache, insert direct item. When direct item + inserted successfully inode is left locked. Return value is always + what we expect from it (number of cut bytes). But when tail remains + in the unformatted node, we set mode to SKIP_BALANCING and unlock + inode */ +int indirect2direct (struct reiserfs_transaction_handle *th, + struct inode * p_s_inode, + struct page *page, + struct path * p_s_path, /* path to the indirect item. */ + struct cpu_key * p_s_item_key, /* Key to look for unformatted node pointer to be cut. */ + loff_t n_new_file_size, /* New file size. */ + char * p_c_mode) +{ + struct super_block * p_s_sb = p_s_inode->i_sb; + struct item_head s_ih; + unsigned long n_block_size = p_s_sb->s_blocksize; + char * tail; + int tail_len, round_tail_len; + loff_t pos, pos1; /* position of first byte of the tail */ + struct cpu_key key; + + p_s_sb->u.reiserfs_sb.s_indirect2direct ++; + + *p_c_mode = M_SKIP_BALANCING; + + /* store item head path points to. */ + copy_item_head (&s_ih, PATH_PITEM_HEAD(p_s_path)); + + tail_len = (n_new_file_size & (n_block_size - 1)); + if (!old_format_only (p_s_sb)) + round_tail_len = ROUND_UP (tail_len); + else + round_tail_len = tail_len; + + pos = le_ih_k_offset (&s_ih) - 1 + (le16_to_cpu (s_ih.ih_item_len) / UNFM_P_SIZE - 1) * p_s_sb->s_blocksize; + pos1 = pos; + + // we are protected by i_sem. The tail can not disapper, not + // append can be done either + // we are in truncate or packing tail in file_release + + tail = (char *)kmap(page) ; /* this can schedule */ + + if (path_changed (&s_ih, p_s_path)) { + /* re-search indirect item */ + if ( search_for_position_by_key (p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND ) + reiserfs_panic(p_s_sb, "PAP-5520: indirect2direct: " + "item to be converted %k does not exist", p_s_item_key); + copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); +#ifdef CONFIG_REISERFS_CHECK + pos = le_ih_k_offset (&s_ih) - 1 + + (le16_to_cpu (s_ih.ih_item_len) / UNFM_P_SIZE - 1) * p_s_sb->s_blocksize; + if (pos != pos1) + reiserfs_panic (p_s_sb, "vs-5530: indirect2direct: " + "tail position changed while we were reading it"); +#endif + } + + + /* Set direct item header to insert. */ + make_le_item_head (&s_ih, 0, inode_items_version (p_s_inode), pos1 + 1, + TYPE_DIRECT, round_tail_len, 0xffff/*ih_free_space*/); + tail = tail + (pos & (PAGE_CACHE_SIZE - 1)) ; + + PATH_LAST_POSITION(p_s_path)++; + + key = *p_s_item_key; + set_cpu_key_k_type (&key, TYPE_DIRECT); + key.key_length = 4; + /* Insert tail as new direct item in the tree */ + if ( reiserfs_insert_item(th, p_s_path, &key, &s_ih, + tail ? tail : NULL) < 0 ) { + /* No disk memory. So we can not convert last unformatted node + to the direct item. In this case we used to adjust + indirect items's ih_free_space. Now ih_free_space is not + used, it would be ideal to write zeros to corresponding + unformatted node. For now i_size is considered as guard for + going out of file size */ + kunmap(tail) ; + return n_block_size - round_tail_len; + } + kunmap(tail) ; + + /* this will invalidate all the buffers in the page after + ** pos1 + */ + unmap_buffers(page) ; + + // note: we have now the same as in above direct2indirect + // conversion: there are two keys which have matching first three + // key components. They only differ by the fouhth one. + + /* We have inserted new direct item and must remove last + unformatted node. */ + p_s_inode->i_blocks += (p_s_sb->s_blocksize / 512); + *p_c_mode = M_CUT; + + /* we store position of first direct item in the in-core inode */ + //mark_file_with_tail (p_s_inode, pos1 + 1); + p_s_inode->u.reiserfs_i.i_first_direct_byte = pos1 + 1; + + return n_block_size - round_tail_len; +} + + + diff -u -r --new-file linux/fs/reiserfs/utils/Makefile v2.4.0-test8/linux/fs/reiserfs/utils/Makefile --- linux/fs/reiserfs/utils/Makefile Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/Makefile Thu Sep 14 19:35:30 2000 @@ -0,0 +1,79 @@ +# +# Warning : reiserfsck and some others utils are broken currently. +# We hope that it will be ready not later reiserfs-3.6.8 +# +.EXPORT_ALL_VARIABLES: + +TOPDIR = $(shell pwd) +# separate utils package detection +SEPARATE_UTILS_PACKAGE=$(shell [ -d reiserfs-src ] && echo yes) +TMPBINDIR = $(TOPDIR)/bin +INCLUDEDIR = $(TOPDIR)/include + +ifeq ($(SEPARATE_UTILS_PACKAGE),yes) + INCLUDEDIR2 = /usr/include/linux +else + INCLUDEDIR2 = $(TOPDIR)/../../../include/linux +endif + +SBIN = /sbin +MANDIR = /usr/man/man8 + +IDIRS = -I$(INCLUDEDIR) -I$(INCLUDEDIR2) -I- + +#CFLAGS = -Wall -c -O -pg -g +#CFLAGS = -Wall -c -O2 -g -pg $(IDIRS) +CFLAGS = -Wall -c -O2 -g $(IDIRS) +#CFLAGS = -Wall -c -g $(IDIRS) +LFLAGS = -g -L$(TMPBINDIR) + + +# FIXME: path to kernel reiserfs .c files +ifeq ($(SEPARATE_UTILS_PACKAGE),yes) + REISERFS_KERNEL_SOURCE = $(TOPDIR)/reiserfs-src +else + + REISERFS_KERNEL_SOURCE = $(TOPDIR)/.. +endif + +REISERFS_LIB = ../lib + +#ALL_SUB_DIRS = lib obj mkreiserfs debugreiserfs emu fsck +ALL_SUB_DIRS = mkreiserfs debugreiserfs +# emu fsck +ALL_PROGS = mkreiserfs debugreiserfs + +all: + mkdir -p bin + set -e; for i in $(ALL_SUB_DIRS); do $(MAKE) -C $$i ; done + +dep: + set -e; for i in $(ALL_SUB_DIRS); do $(MAKE) -C $$i dep ; done + +clean: + set -e; for i in $(ALL_SUB_DIRS); do $(MAKE) -C $$i clean ; done + +install: + for i in $(ALL_PROGS); do $(MAKE) -C $$i install ; done + +uninstall: + set -e; for i in $(ALL_PROGS); do $(MAKE) -C $$i uninstall ; done + +tags: + :> TAGS +# cd mkreiserfs; etags *.[ch] ../lib/*.c ../include/*.h ../../*.c ../../../../include/linux/reiserfs*h +# cd fsck; etags *.[ch] ../lib/*.c ../include/*.h ../../../../include/linux/reiserfs*h +# cd print_disk_layout; etags *.[ch] ../lib/*.c ../include/*.h ../../../../include/linux/reiserfs*h +# cd lib; etags *.[ch] ../lib/*.c ../include/*.h ../../../../include/linux/reiserfs*h +# cd include; etags *.[ch] ../lib/*.c ../include/*.h ../../../../include/linux/reiserfs*h +# cd emu; etags *.[ch] ../lib/*.c ../include/*.h ../../../../include/linux/reiserfs*h +# cd obj; etags ../../*.[ch] ../../../../include/linux/reiserfs*h + rm -f TAGS; etags ../*.[ch] ../../../include/linux/reiserfs*h include/*.h lib/*.c debugreiserfs/*.c mkreiserfs/*.c emu/*.c fsck/*.c + + + + + + + + diff -u -r --new-file linux/fs/reiserfs/utils/README v2.4.0-test8/linux/fs/reiserfs/utils/README --- linux/fs/reiserfs/utils/README Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/README Fri Aug 25 18:24:32 2000 @@ -0,0 +1,31 @@ +This contains programs to create (mkreiserfs) and repair +(reiserfsck) reiserfs file system on a block device. + +IMPORTANT : reiserfsck and some others utils are broken now, + and will be ready later. + +Building is simple: say +make + + +After that you have binaries in bin/. + +You can `make install` to copy programs to /sbin +and man pages to /usr/man/man8. + + +NOTE: do not store data you take care about on reiserfs partition. It + is quite unstable yet. Please, report any weird behaviour ro + reiser@idiom.com. + + +FSCK NOTE: + Reiserfs stores data on disk in quite complicate manner. This leads + that reiserfsck does not look as a simple program. It should + still contain bugs. The perfect way to use it is to backup + target partition first (I am sorry, if you have desire, time and disk + space). If reiserfs will fail, it would be useful to use backuped copy + of the partition in the debugging. + +Thanks a lot + diff -u -r --new-file linux/fs/reiserfs/utils/benchmarks/Makefile v2.4.0-test8/linux/fs/reiserfs/utils/benchmarks/Makefile --- linux/fs/reiserfs/utils/benchmarks/Makefile Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/benchmarks/Makefile Thu Sep 14 23:19:25 2000 @@ -0,0 +1,3 @@ +reiser_fract_tree: reiser_fract_tree.c + gcc -g reiser_fract_tree.c -o reiser_fract_tree -lm -Wall + diff -u -r --new-file linux/fs/reiserfs/utils/benchmarks/README v2.4.0-test8/linux/fs/reiserfs/utils/benchmarks/README --- linux/fs/reiserfs/utils/benchmarks/README Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/benchmarks/README Fri Sep 15 19:50:46 2000 @@ -0,0 +1,33 @@ + + MONGO.SH BENCHMARK. + +To run mongo benchmark please use the next : + +# run_mongo <device> + +The benchmark will be performed on given device with +reiserfs and ext2. Then results will be compared. + +The relults directory : ./results +The comparision *.html and *.txt files in ./results/html + +Warning : All info will be erased on device. + +------------------------------------------------------ + +Mongo.sh description : + + mongo.sh <filesystem> <device> <test_dir> <log> <repeat> + + for example : + mongo.sh reiserfs /dev/hda5 /testfs log 1 + +Be careful : + /dev/hda5 - test device and all info on it will be erased. + It should be at least 500 Mb in size. + + /testfs - mount-point directory + + log - name prefix for results file. + + diff -u -r --new-file linux/fs/reiserfs/utils/benchmarks/mongo.sh v2.4.0-test8/linux/fs/reiserfs/utils/benchmarks/mongo.sh --- linux/fs/reiserfs/utils/benchmarks/mongo.sh Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/benchmarks/mongo.sh Fri Sep 15 17:40:43 2000 @@ -0,0 +1,732 @@ +#!/bin/bash + +if [ $# -lt 5 ] +then + echo + echo "Usage: mongo.sh <filesystem> <device> <mount_point> <log> <repeat>" + echo + echo "<filesystem> - the name of filesystem [reiserfs|ext2]" + echo "<device> - the device for benchmark (e.g. /dev/hda9)" + echo "<mount_point> - mount-point for the filesystem (e.g. /mnt/testfs)" + echo "<log> - the name prefix for file of benchmark results" + echo "<repeat> - the number of benchmark repetitions" + echo + echo examples: + echo mongo.sh reiserfs /dev/hda9 /mnt/testfs journaling_on_results 1 + echo mongo.sh ext2 /dev/hda9 /mnt/testfs ext2_results 1 + echo + echo ............................................... + echo The results will be put in ./results directory + echo + exit +fi + +#...You can skip running part of the benchmark by setting "0" instead of "1": +#---------------------------------------------------------------------------- +execute_single_process_phase=1 + + execute_copying_single=1 + execute_scanning_single=1 + execute_directory_stats_single=1 + execute_deleting_single=1 + +execute_multi_process_phase=1 + + execute_copying_multi=1 + execute_scanning_multi=1 + execute_directory_stats_multi=1 + execute_deleting_multi_parallel=0 + execute_deleting_multi=1 + +#-------- reiser_fract_tree parameters---------------- +# defines +x1mb=$[1024*1024] +x5mb=$[5 * $x1mb] +x50mb=$[50 * $x1mb] +x100mb=$[100 * $x1mb] + +# Total amount of bytes in all files on test partition +small_bytes=$x5mb +medium_bytes=$x50mb +large_bytes=$x100mb + +# Median size of files in bytes for first tree to create +small_size=100 +medium_size=1000 +large_size=100000 + +#Keep the largest file to one fifth (100 million bytes) of the total tree size. +max_file_size=100000000 +#Yuri Shevchuk says that 0 is the median size in real life, so I believe him. +median_dir_nr_files=0 +#This should be larger, change once done testing. + +bytes_to_consume=10000000 + +median_file_size=100 +max_file_size=1000000 + +median_dir_nr_files=100 +max_directory_nr_files=10000 + +median_dir_branching=0 +max_dir_branching=1 + +#This should be varying, someday.... +write_buffer_size=4096 + +numb_of_bytes=($small_bytes $medium_bytes $large_bytes) +size_of_files=($small_size $medium_size $large_size) +#level_of_tree=(2 2 2) +#fract_of_tree=(5 5 5) +reiser_fract_tree_rep_counter=3 +total_params=${#numb_of_bytes[*]} + + +#... Set working directories +#----------------------------- +TOPDIR=$(pwd) +RESDIR=$TOPDIR/results +HTMLDIR=$RESDIR/html + +FILESYSTEM=$1 +DEVICE=$2 +TESTDIR=$3 +LOGFILE=$RESDIR/$4 +LOGFILE2=${LOGFILE}_table +LOGFILE3=${LOGFILE}.tbl +REPETITIONS=$5 + + +TMPFILE=$RESDIR/mongo_tmp + +rep_left=$REPETITIONS + +READIT=$TOPDIR/mongo_read + + +#... Make directories for results +#-------------------------------- +if ! [ -d $RESDIR ] +then + echo Creating dir: $RESDIR + mkdir $RESDIR +fi +if ! [ -d $HTMLDIR ] +then + echo Creating dir: $HTMLDIR + mkdir $HTMLDIR +fi + +#... Compile *.c files if it is necessary +#---------------------------------------- +if ! [ -f reiser_fract_tree ] +then + echo Compiling reiser_fract_tree.c ... + gcc reiser_fract_tree.c -o reiser_fract_tree -lm +fi + +if ! [ -f mongo_read ] +then + echo Compiling mongo_read.c ... + gcc mongo_read.c -o mongo_read +fi + +if ! [ -f mongo_compare ] +then + echo Compiling mongo_compare.c ... + gcc mongo_compare.c -o mongo_compare +fi + +#... Check the command string parameters +#--------------------------------------- +if [ $FILESYSTEM = "reiserfs" ] || [ $FILESYSTEM = "ext2" ] +then + echo +else + echo "mongo.sh: not valid filesystem name: $FILESYSTEM" + echo "Usage: mongo.sh <filesystem> <device> <mount_point> <log> <repeat>" + exit 1 +fi + +if ! [ -b $DEVICE ] +then + echo "mongo.sh: not valid device: $DEVICE" + echo "Usage: mongo.sh <filesystem> <device> <mount_point> <log> <repeat>" + exit 1 +fi + + +#... ------------------The functions definitions -------------------- + +function copy() { + cp -r $1 $2 +} + +function remove() { + rm -rf $@ +} + +function make_fsys() { + + echo "FILESYSTEM=$FILESYSTEM" >> $LOGFILE + + umount $TESTDIR + + if [ $FILESYSTEM = "reiserfs" ] + then + echo y | mkreiserfs $DEVICE; echo + mount -t reiserfs $DEVICE $TESTDIR + fi + + if [ $FILESYSTEM = "ext2" ] + then + echo y | mke2fs $DEVICE + mount -t ext2 $DEVICE $TESTDIR + fi +} + +function cmd_create() { + $TOPDIR/reiser_fract_tree $bytes_to_consume $median_file_size $max_file_size $max_directory_nr_files $median_dir_branching $max_dir_branching $write_buffer_size $TESTDIR +} + +function mongo_single_process() { + + # make and mount the file system + make_fsys; + + echo + echo "mongo_single_process, the_set_of_param.N=$par_set_n of $total_params" + echo "Results in file : $LOGFILE" + echo + + echo 0.Creating files of median size $median_file_size bytes ... + #echo "********* Phase 0: Creating $files_total files of base size $median_file_size in $dirs_total dirs *********" >> $LOGFILE + echo "********* Phase 0: Creating files of median size $median_file_size bytes *********" >> $LOGFILE + i=0; total=0 + while [ $i -lt $reiser_fract_tree_rep_counter ] + do + #echo "reiser_fract_tree $files $level $median_file_size $inverse $TESTDIR/testdir-$i $write_buffer_size" + echo -n "Creating : " + echo -n "Creating : " >> $LOGFILE + t=`(time -p $TOPDIR/reiser_fract_tree $bytes_to_consume $median_file_size $max_file_size $median_dir_nr_files $max_directory_nr_files $median_dir_branching $max_dir_branching $write_buffer_size $TESTDIR/testdir-$i 0) 2>&1` + creating=`(echo $t | (cut -d" " -f2) 2>&1)` + echo "$creating sec." >> $LOGFILE + echo "$creating sec." + total=`export total; export creating; perl -e 'print $ENV{'total'} + $ENV{'creating'}'` + i=$[ $i + 1 ] + done + echo "total creating time: $total sec." + echo "total creating time: $total sec." >> $LOGFILE + creating=$total + + u=`du -sk $TESTDIR 2>&1` + used=`(echo $u | (cut -d" " -f1) 2>&1)` + echo "Used disk space : $used KB" + echo "Used disk space : $used KB" >> $LOGFILE + + sync + + if [ $execute_copying_single -eq 1 ] + then + echo 1.Copying files... + echo "********* Phase I: Copying files *********" >> $LOGFILE + i=0;total=0; + while [ $i -lt $reiser_fract_tree_rep_counter ] + do + echo -n "Copying : " + echo -n "Copying : " >> $LOGFILE + t=`(time -p copy $TESTDIR/testdir-$i $TESTDIR/testdir-$i-$i ) 2>&1` + copying=`(echo $t | (cut -d' ' -f2) 2>&1)` + echo "$copying sec.">> $LOGFILE + echo "$copying sec." + total=`export total; export copying; perl -e 'print $ENV{'total'} + $ENV{'copying'}'` + i=$[ $i + 1 ] + done + echo "total copying time: $total sec." + echo "total copying time: $total sec." >> $LOGFILE + copying=$total + + files_total=`find $TESTDIR -type f | wc -l` + dirs_total=`find $TESTDIR -type d | wc -l` + + u=`du -sk $TESTDIR 2>&1` + used2=`(echo $u | (cut -d" " -f1) 2>&1)` + + echo "Total files : $files_total" + echo "Total dirs : $dirs_total" + + echo "Used disk space (du): $used2 KB" + echo "Used disk space (du): $used2 KB" >> $LOGFILE + + sync + fi + + + df >> $LOGFILE + + if [ $execute_scanning_single -eq 1 ] + then + echo 2.Scanning each file... + echo "********* Phase II: Scanning each file *********" >> $LOGFILE + echo -n "Scanning : " + echo -n "Scanning : " >> $LOGFILE + #t=`(time -p find $TESTDIR -type f -exec $READIT {} $write_buffer_size >& /dev/null \; ) 2>&1` + t=`(time -p find $TESTDIR -type f |xargs $READIT ) 2>&1` + scanning=`(echo $t | (cut -d' ' -f2) 2>&1)` + echo "$scanning sec." >> $LOGFILE + echo "$scanning sec." + sync + fi + + + if [ $execute_directory_stats_single -eq 1 ] + then + echo 3.Recursive directory stats... + echo "********* Phase III: Recursive directory stats *********" >> $LOGFILE + echo -n "Stats time : " + echo -n "Stats time : " >> $LOGFILE + #t=`(time -p find $TESTDIR -exec ls -l {} > /dev/null \; ) 2>&1` + t=`(time -p find $TESTDIR | xargs ls -l > /dev/null ) 2>&1` + (time -p du -s $TESTDIR >> $LOGFILE ) >> $LOGFILE 2>&1 + stating=`(echo $t | (cut -d' ' -f2) 2>&1)` + echo "$stating sec." >> $LOGFILE + echo "$stating sec." + sync + fi + + if [ $execute_deleting_single -eq 1 ] + then + echo 4.Deleting remaining files... + echo "********* Phase VIIII: Deleting remaining files ****" >> $LOGFILE + echo -n "Deleting : " + echo -n "Deleting : " >> $LOGFILE + t=`(time -p remove $TESTDIR/* ) 2>&1` + deleting=`(echo $t | (cut -d' ' -f2) 2>&1)` + echo "$deleting sec.">> $LOGFILE + echo "$deleting sec." + sync + fi + + echo >> $LOGFILE2 + echo "MONGO_SINGLE_PROCESS BENCHMARK RESULTS (time in sec.)" >> $LOGFILE2 + echo " FILESYSTEM=$FILESYSTEM" >> $LOGFILE2 + echo " parameters: files=$files_total, base_size=$median_file_size bytes, dirs=$dirs_total" >> $LOGFILE2 + echo -e "-----------------------------------------------------" >> $LOGFILE2 + echo -e "Create\tCopy\tRead\tStats\tDelete" >> $LOGFILE2 + echo -e " time \ttime\ttime\ttime \t time " >> $LOGFILE2 + echo -e "-----------------------------------------------------" >> $LOGFILE2 + echo -e "$creating\t$copying\t$scanning\t$stating\t$deleting" >> $LOGFILE2 + echo -e "-----------------------------------------------------" >> $LOGFILE2 + echo "The size of files tree : " >> $LOGFILE2 + echo " after create = $used kb" >> $LOGFILE2 + echo " after copy = $used2 kb" >> $LOGFILE2 + echo >> $LOGFILE2 + + echo >> $LOGFILE3 + echo "MONGO_SINGLE_PROCESS " >> $LOGFILE3 + echo "parameters: " >> $LOGFILE3 + echo "files=$files_total " >> $LOGFILE3 + echo "base_size=$median_file_size bytes " >> $LOGFILE3 + echo "dirs=$dirs_total " >> $LOGFILE3 + echo >> $LOGFILE3 + + echo "FSYS=$FILESYSTEM " >> $LOGFILE3 + echo "(time in sec.) " >> $LOGFILE3 + echo "Create : $creating" >> $LOGFILE3 + echo "Copy : $copying " >> $LOGFILE3 + echo "Read : $scanning" >> $LOGFILE3 + echo "Stats : $stating " >> $LOGFILE3 + echo "Delete : $deleting" >> $LOGFILE3 + echo >> $LOGFILE3 + + + echo "******* The end of mongo_single_process *******" >> $LOGFILE + +} + +function del_tmpfiles() { + + i=0; + while [ $i -le $reiser_fract_tree_rep_counter ] + do + if [ -f $TMPFILE-$i ] + then + rm $TMPFILE-$i + fi + i=$[ $i + 1 ] + done + + if [ -f $TMPFILE ] + then + rm $TMPFILE + fi + +} + +function mongo_multi_process() { + + # make and mount the file system + make_fsys; + + echo + echo "mongo_multi_process, rep.N=$rep_n of $REPETITIONS, the_set_of_param.N=$par_set_n of $total_params" + echo "Results in file : $LOGFILE" + echo + + #echo 0..Creating $files_total files of base size $median_file_size bytes in $dirs_total dirs ... + echo 0.Creating files of median size $median_file_size bytes ... + #echo "********* Phase 0: Creating $files_total files of base size $median_file_size in $dirs_total dirs *********" >> $LOGFILE + echo "********* Phase 0: Creating files of median size $median_file_size bytes *********" >> $LOGFILE + i=1 + while [ $i -lt $reiser_fract_tree_rep_counter ] + do + #echo "reiser_fract_tree $files $level $median_file_size $inverse $TESTDIR/testdir-$i $write_buffer_size" >> $LOGFILE + #echo "reiser_fract_tree $files $level $median_file_size $inverse $TESTDIR/testdir-$i $write_buffer_size" + echo "reiser_fract_tree $bytes_to_consume $median_file_size $max_file_size $median_dir_nr_files $max_directory_nr_files $median_dir_branching $max_dir_branching $write_buffer_size" >> $LOGFILE + echo "reiser_fract_tree $bytes_to_consume $median_file_size $max_file_size $median_dir_nr_files $max_directory_nr_files $median_dir_branching $max_dir_branching $write_buffer_size" + #(time -p $TOPDIR/reiser_fract_tree $files $level $median_file_size $inverse $TESTDIR/testdir-$i $write_buffer_size 0) > $TMPFILE-$i 2>&1 & + (time -p $TOPDIR/reiser_fract_tree $bytes_to_consume $median_file_size $max_file_size $median_dir_nr_files $max_directory_nr_files $median_dir_branching $max_dir_branching $write_buffer_size $TESTDIR/testdir-$i 0) > $TMPFILE-$i 2>&1 & + i=$[ $i + 1 ] + done + i=$reiser_fract_tree_rep_counter + creation_cmd="(time -p $TOPDIR/reiser_fract_tree $files $level $median_file_size $inverse $TESTDIR/testdir-$i $write_buffer_size 0) >> $LOGFILE 2>&1"; + #echo "reiser_fract_tree $files $level $median_file_size $inverse $TESTDIR/testdir-$i $write_buffer_size" >> $LOGFILE + #echo "reiser_fract_tree $files $level $median_file_size $inverse $TESTDIR/testdir-$i $write_buffer_size" + echo "reiser_fract_tree $bytes_to_consume $median_file_size $max_file_size $median_dir_nr_files $max_directory_nr_files $median_dir_branching $max_dir_branching $write_buffer_size" >> $LOGFILE + echo "reiser_fract_tree $bytes_to_consume $median_file_size $max_file_size $median_dir_nr_files $max_directory_nr_files $median_dir_branching $max_dir_branching $write_buffer_size" + + #(time -p $TOPDIR/reiser_fract_tree $files $level $median_file_size $inverse $TESTDIR/testdir-$i $write_buffer_size 0) > $TMPFILE-$i 2>&1 + (time -p $TOPDIR/reiser_fract_tree $bytes_to_consume $median_file_size $max_file_size $median_dir_nr_files $max_directory_nr_files $median_dir_branching $max_dir_branching $write_buffer_size $TESTDIR/testdir-$i 0) > $TMPFILE-$i 2>&1 + sleep 30 + + total=0 + i=1 + while [ $i -le $reiser_fract_tree_rep_counter ] + do + echo -n "Creating : " + echo -n "Creating : " >> $LOGFILE + creating=`(cat $TMPFILE-$i | grep "real" | (cut -d" " -f2) 2>&1)` + echo "$creating sec." >> $LOGFILE + echo "$creating sec." + total=`export total; export creating; perl -e 'print $ENV{'total'} + $ENV{'creating'}'` + i=$[ $i + 1 ] + done + echo "total creating time: $total sec." + echo "total creating time: $total sec." >> $LOGFILE + creating=$total + + del_tmpfiles; + + u=`du -sk $TESTDIR 2>&1` + used=`(echo $u | (cut -d" " -f1) 2>&1)` + echo "Used disk space : $used KB" + echo "Used disk space : $used KB" >> $LOGFILE + + sync + + if [ $execute_copying_multi -eq 1 ] + then + echo 1..Copying files... + echo "********* Phase I: Copying files *********" >> $LOGFILE + i=1 + while [ $i -lt $reiser_fract_tree_rep_counter ] + do + (time -p copy $TESTDIR/testdir-$i $TESTDIR/testdir-$i-$i ) > $TMPFILE-$i 2>&1 & + i=$[ $i + 1 ] + done + i=$reiser_fract_tree_rep_counter + (time -p copy $TESTDIR/testdir-$i $TESTDIR/testdir-$i-$i ) > $TMPFILE-$i 2>&1 + + sleep 30 + + total=0 + i=1 + while [ $i -le $reiser_fract_tree_rep_counter ] + do + echo -n "Copying : " + echo -n "Copying : " >> $LOGFILE + copying=`(cat $TMPFILE-$i | grep "real" | (cut -d" " -f2) 2>&1)` + echo "$copying sec." >> $LOGFILE + echo "$copying sec." + total=`export total; export copying; perl -e 'print $ENV{'total'} + $ENV{'copying'}'` + i=$[ $i + 1 ] + done + echo "total copying time: $total sec." + echo "total copying time: $total sec." >> $LOGFILE + copying=$total + del_tmpfiles; + + files_total=`find $TESTDIR -type f | wc -l` + dirs_total=`find $TESTDIR -type d | wc -l` + + u=`du -sk $TESTDIR 2>&1` + used2=`(echo $u | (cut -d" " -f1) 2>&1)` + echo "Total files : $files_total" + echo "Total dirs : $dirs_total" + echo "Used disk space (du) : $used2 KB" + echo "Used disk space (du) : $used2 KB" >> $LOGFILE + sync + fi + + df >> $LOGFILE + + if [ $execute_scanning_multi -eq 1 ] + then + echo 2.a..Scanning each file--- parallel... + echo "********* Phase II-a: Scanning each file--- parallel *********" >> $LOGFILE + if [ $execute_copying_multi -eq 1 ] + then + i=1 + while [ $i -lt $reiser_fract_tree_rep_counter ] + do + #(time -p find $TESTDIR/testdir-$i $TESTDIR/testdir-$i-$i -type f -exec $READIT {} $write_buffer_size >& /dev/null \; ) > $TMPFILE-$i 2>&1 & + (time -p find $TESTDIR/testdir-$i $TESTDIR/testdir-$i-$i -type f | xargs $READIT ) > $TMPFILE-$i 2>&1 & + i=$[ $i + 1 ] + done + i=$reiser_fract_tree_rep_counter + #(time -p find $TESTDIR/testdir-$i $TESTDIR/testdir-$i-$i -type f -exec $READIT {} $write_buffer_size >& /dev/null \; ) > $TMPFILE-$i 2>&1 + (time -p find $TESTDIR/testdir-$i $TESTDIR/testdir-$i-$i -type f |xargs $READIT ) > $TMPFILE-$i 2>&1 + else + i=1 + while [ $i -lt $reiser_fract_tree_rep_counter ] + do + #(time -p find $TESTDIR/testdir-$i -type f -exec $READIT {} $write_buffer_size >& /dev/null \; ) > $TMPFILE-$i 2>&1 & + (time -p find $TESTDIR/testdir-$i -type f | xargs $READIT ) > $TMPFILE-$i 2>&1 & + i=$[ $i + 1 ] + done + i=$reiser_fract_tree_rep_counter + #(time -p find $TESTDIR/testdir-$i -type f -exec $READIT {} $write_buffer_size >& /dev/null \; ) > $TMPFILE-$i 2>&1 + (time -p find $TESTDIR/testdir-$i -type f | xargs $READIT ) > $TMPFILE-$i 2>&1 + fi + sleep 30 + + total=0 + i=1 + while [ $i -le $reiser_fract_tree_rep_counter ] + do + echo -n "Scanning_a : " + echo -n "Scanning_a : " >> $LOGFILE + scanning=`(cat $TMPFILE-$i | grep "real" | (cut -d" " -f2) 2>&1)` + echo "$scanning sec." >> $LOGFILE + echo "$scanning sec." + total=`export total; export scanning; perl -e 'print $ENV{'total'} + $ENV{'scanning'}'` + i=$[ $i + 1 ] + done + echo "total scanning_a time: $total sec." + echo "total scanning_a time: $total sec." >> $LOGFILE + scanning=$total + del_tmpfiles; + sync + + echo 2.b..Scanning each file --- serial after parallel... + echo "********* Phase II-b: Scanning each file --- serial after parallel *********" >> $LOGFILE + #(time -p find $TESTDIR -type f -exec $READIT {} $write_buffer_size >& /dev/null \; ) > $TMPFILE 2>&1 + (time -p find $TESTDIR -type f |xargs $READIT {} ) > $TMPFILE 2>&1 + sleep 30 + + echo -n "Scanning_b : " + echo -n "Scanning_b : " >> $LOGFILE + scanning_b=`(cat $TMPFILE | grep "real" | (cut -d" " -f2) 2>&1)` + echo "$scanning_b sec." >> $LOGFILE + echo "$scanning_b sec." + del_tmpfiles; + sync + fi + + if [ $execute_directory_stats_multi -eq 1 ] + then + echo 3..Recursive directory stats... + echo "********* Phase III: Recursive directory stats *********" >> $LOGFILE + #(time -p find $TESTDIR -exec ls -l {} > /dev/null \; ) > $TMPFILE 2>&1 + (time -p find $TESTDIR |xargs ls -l > /dev/null ) > $TMPFILE 2>&1 + #(time -p du -s $TESTDIR >> $LOGFILE ) >> $LOGFILE 2>&1 + sleep 30 + echo -n "Stats time : " + echo -n "Stats time : " >> $LOGFILE + stating=`(cat $TMPFILE | grep "real" | (cut -d" " -f2) 2>&1)` + echo "$stating sec." >> $LOGFILE + echo "$stating sec." + del_tmpfiles; + sync + fi + + if [ $execute_deleting_multi_parallel -eq 1 ] + then + echo 4..Deleting remaining files parallel... + echo "********* Phase VIIII: Deleting remaining files ****" >> $LOGFILE + i=1 + while [ $i -lt $reiser_fract_tree_rep_counter ] + do + (time -p remove $TESTDIR/testdir-$i ) > $TMPFILE-$i 2>&1 & + i=$[ $i + 1 ] + done + i=$reiser_fract_tree_rep_counter + (time -p remove $TESTDIR/testdir-$i ) > $TMPFILE-$i 2>&1 & + + sleep 30 + + total=0 + i=1 + while [ $i -le $reiser_fract_tree_rep_counter ] + do + echo -n "Deleting : " + echo -n "Deleting : " >> $LOGFILE + deleting=`(cat $TMPFILE-$i | grep "real" | (cut -d" " -f2) 2>&1)` + echo "$deleting sec." >> $LOGFILE + echo "$deleting sec." + total=`export total; export deleting; perl -e 'print $ENV{'total'} + $ENV{'deleting'}'` + i=$[ $i + 1 ] + done + echo "total deleting time: $total sec." + echo "total deleting time: $total sec." >> $LOGFILE + deleting=$total + del_tmpfiles; + sync + + fi + + if [ $execute_deleting_multi -eq 1 ] + then + echo 4..Deleting remaining files... + echo "********* Phase VIIII: Deleting remaining files ****" >> $LOGFILE + (time -p remove $TESTDIR/* ) > $TMPFILE 2>&1 + echo -n "Deleting : " + echo -n "Deleting : " >> $LOGFILE + deleting=`(cat $TMPFILE | grep "real" | (cut -d" " -f2) 2>&1)` + echo "$deleting sec." >> $LOGFILE + echo "$deleting sec." + del_tmpfiles; + sync + + fi + + echo >> $LOGFILE2 + echo "MONGO_MULTI_PROCESS BENCHMARK RESULTS (time in sec.)" >> $LOGFILE2 + echo " FILESYSTEM=$FILESYSTEM" >> $LOGFILE2 + echo " parameters: files=$files_total, base_size=$median_file_size bytes, dirs=$dirs_total" >> $LOGFILE2 + echo -e "----------------------------------------------------" >> $LOGFILE2 + echo -e "Create\tCopy\tRead \tRead \tStats\tDelete" >> $LOGFILE2 + echo -e " time \ttime\tparall\tserial\ttime \t time " >> $LOGFILE2 + echo -e "----------------------------------------------------" >> $LOGFILE2 + echo -e "$creating\t$copying\t$scanning\t$scanning_b\t$stating\t$deleting" >> $LOGFILE2 + echo -e "----------------------------------------------------" >> $LOGFILE2 + echo "The size of files tree : " >> $LOGFILE2 + echo " after create = $used kb" >> $LOGFILE2 + echo " after copy = $used2 kb" >> $LOGFILE2 + echo >> $LOGFILE2 + + + echo >> $LOGFILE3 + + echo "MONGO_MULTI_PROCESS " >> $LOGFILE3 + echo "parameters: " >> $LOGFILE3 + echo "files=$files_total " >> $LOGFILE3 + echo "base_size=$median_file_size bytes " >> $LOGFILE3 + echo "dirs=$dirs_total " >> $LOGFILE3 + echo >> $LOGFILE3 + + echo "FSYS=$FILESYSTEM " >> $LOGFILE3 + echo "(time in sec.) " >> $LOGFILE3 + echo "Create : $creating" >> $LOGFILE3 + echo "Copy : $copying " >> $LOGFILE3 + echo "Read parallel : $scanning" >> $LOGFILE3 + echo "Read serial : $scanning_b" >> $LOGFILE3 + echo "Stats : $stating " >> $LOGFILE3 + echo "Delete : $deleting" >> $LOGFILE3 + echo >> $LOGFILE3 + + + del_tmpfiles; + + echo "******* The end of mongo_multi_process *******" >> $LOGFILE + +} + +function set_params() { + + bytes=$1 + size=$2 + #level=$3 + #inverse=$4 + + bytes_to_consume=$1 + median_file_size=$2 + #files=10 + #files_total=$[ $files * $reiser_fract_tree_rep_counter ] + #dirs_total=1 + #i=0; + #while [ $i -lt $level ] + #do + # dirs_total=$[ $dirs_total * 10 ] + # i=$[ $i + 1 ] + #done + #dirs_total=$[ $dirs_total * $reiser_fract_tree_rep_counter] +} + + +function run_mongo_single() { + + echo " ***** Starting mongo_single_process, rep.N=$rep_n *****" + echo " ***** Starting mongo_single_process, rep.N=$rep_n *****" >> $LOGFILE + + k=0 + while [ $k -lt ${#numb_of_bytes[*]} ] + do + set_params ${numb_of_bytes[$k]} ${size_of_files[$k]} ${level_of_tree[$k]} ${fract_of_tree[$k]}; + par_set_n=$[ $k + 1 ] + mongo_single_process; + k=$[ $k + 1 ] + done + + echo " ***** Finished mongo_single_process, repetitions left:$rep_left *****" + echo " ***** Finished mongo_single_process, repetitions left:$rep_left *****" >> $LOGFILE +} + +function run_mongo_multi() { + + echo " ***** Starting mongo_multi_process, rep.N=$rep_n *****" + echo " ***** Starting mongo_multi_process, rep.N=$rep_n *****" >> $LOGFILE + + k=0 + while [ $k -lt ${#numb_of_bytes[*]} ] + do + set_params ${numb_of_bytes[$k]} ${size_of_files[$k]} ${level_of_tree[$k]} ${fract_of_tree[$k]}; + par_set_n=$[ $k + 1 ] + mongo_multi_process; + k=$[ $k + 1 ] + done + + echo " ***** Finished mongo_multi_process, repetitions left:$rep_left *****" + echo " ***** Finished mongo_multi_process, repetitions left:$rep_left *****" >> $LOGFILE + +} + +#...------------------------START MONGO BENCHMARK--------------------------- + +echo "***** Starting mongo.sh benchmark ******" +echo "***** Starting mongo.sh benchmark ******" >> $LOGFILE + +rep_n=0 +while [ $rep_left -gt 0 ] +do + rep_n=$[ $rep_n + 1 ] + rep_left=$[ $rep_left - 1 ] + + if [ $execute_single_process_phase -eq 1 ] + then + run_mongo_single; + fi + + if [ $execute_multi_process_phase -eq 1 ] + then + run_mongo_multi; + fi +done + +umount $TESTDIR + +echo "***** Finished a mongo.sh benchmark, repetitions:$REPETITIONS *****" +echo "***** Finished a mongo.sh benchmark, repetitions:$REPETITIONS *****" >> $LOGFILE +echo "Results in file : $LOGFILE" +sync diff -u -r --new-file linux/fs/reiserfs/utils/benchmarks/mongo_compare.c v2.4.0-test8/linux/fs/reiserfs/utils/benchmarks/mongo_compare.c --- linux/fs/reiserfs/utils/benchmarks/mongo_compare.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/benchmarks/mongo_compare.c Tue Dec 14 20:50:43 1999 @@ -0,0 +1,205 @@ +#include <stdio.h> +#include <ctype.h> +#include <string.h> + + +char time_str1[50]; +char time_str2[50]; +char name_str1[50]; +char tmp_str[20][100]; + +char out1[256]; +char out2[256]; + +FILE *f1; +FILE *f2; +FILE *f3; +FILE *f4; + +void write_html_head(FILE *fp); +void write_html_end(FILE *fp); + +char head_str[]="\n \ +<!doctype html public \"-//w3c//dtd html 4.0 transitional//en\">\n \ +<html>\n \ +<head>\n \ + <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">\n \ + <meta name=\"GENERATOR\" content=\"Mozilla/4.5 [en] (X11; I; Linux 2.2.7 i586) [Netscape]\">\n \ +</head>\n \ +<body>\n \ +"; +/* +<tt></tt> \n \ +<table BORDER NOSAVE >\n \ +<tr BGCOLOR=\"#CCFFFF\" NOSAVE>\n \ +<td NOSAVE> \n \ +"; +*/ + +char end_str[]="\n \ +</table> \n \ +<tt></tt> \n \ +</body> \n \ +</html> \n \ +"; + +main(int argc, char **argv) +{ + float n1, n2, ratio; + char *p, *p1, *p2; + char line0[100]; + char line1[100]; + char line2[100]; + char line3[100]; + char out_line[100]; + char html_line[500]; + int i, k; + + if( argc < 3) { + printf("\nUsage: mongo_compare file1 file2 res_file\n\n"); + printf("\t<file1> should contain reiserfs or ext2 results of mogo benchmark\n"); + printf("\t<file2> should contain reiserfs or ext2 results of mogo benchmark\n"); + printf("\tMongo results will be compared\n"); + printf("\t<res_file.txt> will be contain results in the text form\n"); + printf("\t<res_file.html> will be contain results in the html form\n"); + exit(0); + } + + strcpy(out1,argv[3]); + strcat(out1,".txt"); + + strcpy(out2,argv[3]); + strcat(out2,".html"); + + if( (f1=fopen(argv[1],"r")) == NULL) { + fprintf(stderr, "%s: can't open %s\n", argv[0], argv[1] ); + return 1; + } + + if( (f2=fopen(argv[2],"r")) == NULL) { + fprintf(stderr, "%s: can't open %s\n", argv[0], argv[2] ); + return 1; + } + + if( (f3=fopen(out1,"wr")) == NULL) { + fprintf(stderr, "%s: can't open %s\n", argv[0], out1 ); + return 1; + } + + if( (f4=fopen(out2,"wr")) == NULL) { + fprintf(stderr, "%s: can't open %s\n", argv[0], out2 ); + return 1; + } + + write_html_head(f4); + i=0; + while( fgets(line1, 100, f1) ){ + fgets(line2, 100, f2); + + if ( p=strstr(line1,"\n")) *(p+1)=0; + if ( p=strstr(line2,"\n")) *(p+1)=0; + + strcpy(line3,line1); + line3[strlen(line3)-1]=0; + + while ( strlen(line3) < 40 ){ + strcat(line3," "); + } + + if (strstr(line3,"MONGO_")){ + fprintf(f4,"</table>\n<table BORDER NOSAVE >\n"); + fprintf(f4,"<tr BGCOLOR=\"#CCFFFF\" NOSAVE>"); + fprintf(f4,"<td NOSAVE>\n"); + i=0; + } + if(i<20) strcpy(tmp_str[i],line2); + + if (strstr(line3,"FSYS=")) { + fprintf(f4, "</td><td>\n"); + for (k=0; k<i; k++){ + fprintf(f4, "<tt>%s</tt><br>\n", tmp_str[k]); + } + fprintf (f4, "</td>\n <tr BGCOLOR=\"#CCFFFF\" NOSAVE><td COLSPAN=\"2\"><tt><B> %s %s </B></tt>\n", line3, line2); + i=20; + } + else if ( NULL == strstr(line3, " :" )) { + + if (strstr(line3, "(time") ) fprintf(f4,"<br><tt><center>%s</center></tt>\n",line3); + else { + k=0; p=line3; + while ( *p++ != 0) { + if( (*p != ' ') && (*p != '\n') ) k++; + } + if (k > 0) { + fprintf(f4, "<tt>%s</tt><br>\n", line3); + if (i<20) i++; + } + } + } + + else if (strstr(line3,"Create")) fprintf (f4, "</td>\n"); + + line2[strlen(line2)-1]=0; + while ( strlen(line2) < 40 ){ + strcat(line2," "); + } + + strcat(line3,line2); + + + strcpy(out_line, line3); + strcat(out_line, "\n"); + name_str1[0]=0; + + if ( p1=strstr(line1, " :" )) { + strcpy(time_str1, p1+2); + strncpy(name_str1, line1, p1-line1); + + if ( p2=strstr(line2, " :" )) { + strcpy(time_str2, p2+2); + + time_str1[strlen(time_str1)-1]=0; + time_str2[strlen(time_str2)-1]=0; + + sscanf(time_str1,"%f", &n1); + sscanf(time_str2,"%f", &n2); + + ratio = n1/n2; + sprintf(out_line, "%s : %6.2f / %6.2f = %.2f\n", + name_str1, n1, n2, ratio); + + fprintf(f4, "<tr><td><tt> %s   </tt></td> <td><div align=right><tt> %6.2f / %6.2f = %.2f   </tt></div></td></tr>\n", + name_str1, n1, n2, ratio); + + } + } + + fprintf (f3, "%s", out_line); + line1[0] = 0; + line2[0] = 0; + line3[0] = 0; + out_line[0] = 0; + } + + write_html_end(f4); + + fclose (f1); + fclose (f2); + + fclose (f3); + fclose (f4); + + fflush (f3); + fflush (f4); +} +/*******************************************/ +void write_html_head(FILE * fp) +{ + fprintf (fp, "%s", head_str); +} +/*******************************************/ +void write_html_end(FILE * fp) +{ + fprintf (fp, "%s", end_str); +} + diff -u -r --new-file linux/fs/reiserfs/utils/benchmarks/mongo_read.c v2.4.0-test8/linux/fs/reiserfs/utils/benchmarks/mongo_read.c --- linux/fs/reiserfs/utils/benchmarks/mongo_read.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/benchmarks/mongo_read.c Fri Sep 15 13:34:06 2000 @@ -0,0 +1,48 @@ +/* + * MONGO READ - simple possible program to read a file - suitable for benchmarking FS read performance + */ + +#include <stdio.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdlib.h> +#include <malloc.h> +#include <unistd.h> + +int main (int argc, char ** argv) +{ + int fd, rd; + char * buf; + int bufsize=4096; + + if (argc < 2) { + printf ("\nUsage: %s filename [buffer_size]\n\n", argv[0]); + return 0; + } + if (argc == 3) + bufsize = atoi(argv[2]); + buf = (char *)malloc (bufsize); + if (buf == 0) { + printf ("Malloc failed on %d\n", bufsize); + return 0; + } + + fd = open (argv[1], O_RDONLY); + if (fd == -1) { + printf ("Open failed (%s)\n", strerror (errno)); + return 0; + } + + while ((rd = read (fd, buf, bufsize)) == bufsize) + ; + + free (buf); + if (rd == -1) { + printf ("Read failed (%s)\n", strerror (errno)); + return 0; + } + + return 0; +} diff -u -r --new-file linux/fs/reiserfs/utils/benchmarks/reiser_fract_tree.c v2.4.0-test8/linux/fs/reiserfs/utils/benchmarks/reiser_fract_tree.c --- linux/fs/reiserfs/utils/benchmarks/reiser_fract_tree.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/benchmarks/reiser_fract_tree.c Fri Sep 15 04:03:45 2000 @@ -0,0 +1,517 @@ + /* Copyright 1999, 2000 by Hans Reiser, licensing governed by linux/fs/reiserfs/README */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +char tdir[256]; +char path[256]; + +int stats=1; + +void print_usage() +{ +printf(" +This program creates files in a tree of random depth and +branching. Files vary in size randomly according to a distribution +function which seems to model real file systems. This distribution +function has a median size of median_file_size (Median file size is +hypothesized to be proportional to the average per file space +wastage. Notice how that implies that with a more efficient file +system file size usage patterns will in the long term move to a lower +median file size), and a maximum size of max_file_size. Directories +vary in size according to the same distribution function but with +separate parameters to control median and maximum size for the number +of files within them, and the number of subdirectories within them. +This program prunes some empty subdirectories in a way that causes +parents of leaf directories to branch less than +median_dir_branching. + + To avoid having one large file distort the results such that you have +to benchmark many times set max_file_size to not more than +bytes_to_consume/10. If maximum/median is a small integer, then +randomness is very poor. This is a bug, Nikita, please find some +clever way to fix it. If it is 0, then the program crashes.... + +For isolating performance consequences of design variations on +particular file or directory size ranges, try setting their median size and +max_size to both equal the max size of the file size range you want +to test. + +To avoid having one large file distort the results set max_file_size to +not more than bytes_to_consume/10. Using a distribution function for +the sizes of writes would be a natural next step in developing this program.\n\n" +); + +printf("Usage: reiser_fract_tree bytes_to_consume median_file_size max_file_size median_dir_nr_files max_directory_nr_files median_dir_branching max_dir_branching write_buffer_size /testfs_mount_point print_stats_flag\n\n"); +} +/* #define DEBUG */ + + /* buffer from which we write */ +char * write_buffer; +int write_buffer_size = 0; /* gets reset to an argv */ + /* keep out of disk space errors from being endless by tracking whether we already printed the message */ +static int already_whined = 0; + + /* create files until their total + number of bytes exceeds this number, + but not by more than 1/10th */ +long bytes_to_consume = 0; + /* bytes created so far */ +long byte_total = 0; + + /* statistics on sizes of files we attempted to create */ +int fsz_0_100 =0; +int fsz_100_1k =0; +int fsz_1k_10k =0; +int fsz_10k_100k =0; +int fsz_100k_1m =0; +int fsz_1m_10m =0; +int fsz_10m_larger =0; + +void chngdir(char *name) +{ + int i; + + if ( name[0] == '.' && name[1] == '.') { + for (i=strlen(path); i>0; i--) { + if (path[i] == '/') { + path[i]=0; + break; + } + } + }else { + strcat(path,"/"); + strcat(path,name); + } + +} + + /* this is the core statistical + distribution function, and it is + used for file sizes, directory + sizes, etc. */ +int determine_size(double median_size, + double max_size /* The maximal value of size */ ) +{ + /* when x is half of its random range + (max_size/median_size), result is + median_size */ + int nr_random, granularity_reducer; + double size, double_nr_random; + + /* it is a feature for us that this + repeats identically every time it + is run, as otherwise meaningless + variances would affect our results + and require us to use a higher + number of benchmarks to achieve low + noise results. */ + nr_random = rand(); + median_size++; /* avoids divide by zero errors */ + + /* this code does poorly when max_size + is not a lot more than median size, + and that needs fixing */ + + /* THE NEXT 2 LINES ARE THE HEART OF THE PROGRAM */ + + + /* keep x below the value that when + multiplied by median size on the + next line will equal max_size */ + /* the granularity_reducer is to + handle the case where max_size is + near median_size, since '%' can + only take ints, we need this + complicated what of handling that + for small values of + max_size/median_size by making + large ints out of small ints + temporarily. */ + if (max_size/median_size < 1024) + granularity_reducer = 1024 * 1024; + else + granularity_reducer = 1; + nr_random = nr_random % ((int) (granularity_reducer * (((double) max_size)/((double) median_size)))); + double_nr_random = ((double) nr_random)/(granularity_reducer); + size = median_size * (1/(1 - (double_nr_random)/(((double)max_size)/((double) median_size))) -1); + return ((int) size); +} + + + /* generate a unique filename */ +void get_name_by_number(long this_files_number, char * str) +{ + sprintf (str, "%lu", this_files_number); +} + + /* make a file of a specified size */ +void make_file(int size) +{ + char string [128] = {0}; + char * str = string; + char fname[256]; + int fd = 0; + int error; + static long this_files_number = 1; + + /* collect statistics about the size + of files created, or more + precisely, the size of files that + we will attempt to create. */ + if (size <= 100) fsz_0_100++; + else if (size <= 1000) fsz_100_1k++; + else if (size <= 10*1000) fsz_1k_10k++; + else if (size <= 100*1000) fsz_10k_100k++; + else if (size <= 1000*1000) fsz_100k_1m++; + else if (size <= 10*1000*1000) fsz_1m_10m++; + else fsz_10m_larger++; + + /* construct a name for the file */ + get_name_by_number(this_files_number++, str); + strcpy(fname, path); + strcat(fname, "/"); + strcat(fname, str); + + /* open the file, and deal with the + various errors that can occur */ + while ((fd = open(fname, O_CREAT|O_EXCL|O_RDWR, 0777)) == -1 ) + { + if (errno == ENOSPC) + { + if (!already_whined) { + printf("reiser-2021: out of disk space, "); + already_whined = 1; + } + } + /* it is sometimes useful to be able + to run this program more than once + inside the same directory, and + that means skipping over filenames + that already exist. Thus we + ignore EEXIST, and pay attention + to all else. */ + if ( errno != EEXIST && errno != ENOSPC) { + printf("reiser-2017: open() errno is %d, ", errno); + exit(errno); + } + get_name_by_number(this_files_number++, str); + } + /* write to the file until it is the + right size, handling the various + error conditions appropriately */ + + while(size>0) + { + size -= (error = write(fd, write_buffer, (size < write_buffer_size - 1) ? size : (write_buffer_size - 1))); + if (errno == ENOSPC) + { + if (!already_whined) { + printf("reiser-2022: out of disk space, will keep trying\n"); +/* fflush(STDOUT); */ + already_whined = 1; + } + } + if (error == -1 && errno != EAGAIN && errno != ENOSPC) { + printf("reiser-2020: errno is %d, ", errno); + exit(errno); + } + } + + /* close the file */ + if (close(fd)) + { + printf("reiser-2019: close() errno is %d, ", errno); + exit(errno); + } +} + + /* print the statistics on how many + files were created of what size */ + +void print_stats() +{ + if (!stats) return; + + printf("\n"); + printf("File stats: Units are decimal (1k = 1000)\n"); + printf("files 0-100 : %i\n",fsz_0_100); + printf("files 100-1K : %i\n",fsz_100_1k); + printf("files 1K-10K : %i\n",fsz_1k_10k); + printf("files 10K-100K : %i\n",fsz_10k_100k); + printf("files 100K-1M : %i\n",fsz_100k_1m); + printf("files 1M-10M : %i\n",fsz_1m_10m); + printf("files 10M-larger : %i\n",fsz_10m_larger); + printf("total bytes written : %lu\n",byte_total); + +} + /* predict the number of files that + will be created before max_bytes + total length of files is reached */ +long determine_nr_of_files(int median_file_size, double max_file_size, long bytes_to_consume) +{ + long nr_of_files = 0, byte_total = 0; + + /* the next line is not necessary as 1 + is the default, it is just cautious + coding */ + srand(1); + while (byte_total < bytes_to_consume ) + { + byte_total += determine_size(median_file_size, max_file_size); + nr_of_files++; + } + /* reset the random number generator + so that when we determine_size() of + the files later they will be + created with the same "random" + sequence used in this calculation */ + srand(1); +#ifdef DEBUG + printf("number of files is %d\n", (int) nr_of_files); +#endif /* DEBUG */ + fflush(NULL); + return nr_of_files; +} + + /* fill the current working directory + with nr_files_this_directory number of files*/ + +void fill_this_directory(long nr_files_this_directory, long median_file_size, long maximum_size) +{ + long size; + +#ifdef DEBUG + printf("filling with %lu files, ", nr_files_this_directory); +#endif + while (nr_files_this_directory--) + { + size = determine_size(median_file_size, maximum_size); + make_file(size); + } +} + + + /* this will unfortunately handle out of disk space by forever trying */ +void make_directory(char * dirname) +{ + static long this_directory_number = 0; + + sprintf(dirname, "d%lu", this_directory_number++); + strcpy(tdir, path); + strcat(tdir, "/"); + strcat(tdir, dirname); + + while (mkdir(tdir, 0x755) == -1 ) + { + if (errno == ENOSPC) + { + if (!already_whined) { + printf("reiser-2021: out of disk space, "); + already_whined = 1; + } + } + /* it is sometimes useful to be able + to run this program more than once + inside the same directory, and + that means skipping over filenames + that already exist. Thus we + ignore EEXIST, and pay attention + to all else. */ + if ( errno != EEXIST && errno != ENOSPC) + printf("reiser-2017: open() errno is %d, ", errno); + sprintf(dirname, "d%lu", this_directory_number++); + strcpy(tdir, path); + strcat(tdir, "/"); + strcat(tdir, dirname); + } +} + + /* assumes we are already chdir'd into + a directory that the subtree is + rooted at. Fills the directory + with files and subdirectories, cd's + into those subdirectories, and + recurses upon itself */ + +void do_subtree( + /* the start and end of the portion of the directory + sizes array which corresponds to the sizes of the + directories composing this subtree */ + /* sizes_end minus sizes_start is + equal to the number of directories + in this subtree */ + long * sizes_start, long * sizes_end, + + long median_file_size, long maximum_file_size, long median_dir_branching, long max_dir_branching + ) +{ + long * p; + long * sub_start; + long * sub_end; + int index_subdirectory_to_add_directory_to; + long * dirs_in_subtrees; + char * subtree_name; + long * sizes_index = sizes_start; + char subtree_name_array[128]; + long this_directory_branching; + static long this_directorys_number; + + subtree_name = subtree_name_array; + /* fill this directory with its number of files */ + fill_this_directory(*sizes_index, median_file_size, maximum_file_size); + sizes_index++; + /* ok, now randomly assign directories + (and their number of files) among + the subdirectories that will be + created if at least one directory + is assigned to it */ + + /* this will cause the random number + sequence to not match the one used + in determine_nr_files() I need to + accumulate my values in an array + beforehand. I'll code that later. */ + /* worry about whether 0 or 1 is a + problem value */ + this_directory_branching = determine_size(median_dir_branching, max_dir_branching) + 1; + + /* create an array holding the number + of directories assigned to each + potential subdirectory */ + dirs_in_subtrees = calloc(this_directory_branching, sizeof(long)); + while (sizes_index <= sizes_end) + { + index_subdirectory_to_add_directory_to=(rand() % this_directory_branching); + (*(dirs_in_subtrees+ index_subdirectory_to_add_directory_to))++; + sizes_index++; + } + /* the +1 is for the fill_directory() we did above */ + sizes_index = sizes_start + 1; + + /* go through each potential + subdirectory, and if at least one + directory has been assigned to it, + create it and recurse */ + for (p= dirs_in_subtrees; p < (dirs_in_subtrees + this_directory_branching);p++) + { + if (*p) { + sprintf(subtree_name, "d%lu", this_directorys_number++); + make_directory(subtree_name); + chngdir (subtree_name); + sub_start = sizes_index; + /* the minus one is because *p is the number of elements and arrays start at 0 */ + sub_end = (sizes_index + (*p - 1)); + +#ifdef DEBUG + /* comment this back in if the array logic has you going cross-eyed */ + /* printf ("sizes_start is %p, sizes_index is %p, sizes_index+p is %p, sizes_end is %p\n", sizes_start, sub_start, sub_end, sizes_end); */ +#endif + do_subtree(sub_start, sub_end, median_file_size, maximum_file_size, median_dir_branching, max_dir_branching); + chngdir(".."); + } + sizes_index += *p; + } +} + + /* We have already determined that + nr_files can fit in bytes_to_consume + space. Fill the sizes array with + the number of files to be in each + directory, and then call do_subtree + to fill the tree with files and + directories. */ + +void make_fractal_tree (long median_file_size, long maximum_file_size, long median_dir_nr_files, long max_dir_nr_files, long median_dir_branching, long max_dir_branching, long nr_files) +{ + long * sizes_start; + long * sizes_end; + long * sizes_index; + long remaining_files = nr_files; + + /* collect together array of directory + sizes for whole filesystem. This + cannot easily be done recursively + without distorting the directory + sizes and making deeper directories + smaller. Send me the code if you + disagree.:-) */ + /* we almost certainly don't need this much space, but so what.... */ + sizes_index = sizes_start = malloc(nr_files * sizeof(long)); + for (;remaining_files > 0;) + { + *sizes_index = determine_size(median_dir_nr_files, max_dir_nr_files); + *sizes_index = (*sizes_index < remaining_files) ? *sizes_index : remaining_files; + +#ifdef DEBUG + printf("*sizes_index == %lu, ", *sizes_index); +#endif + remaining_files -= *sizes_index; + sizes_index++; + } + /* don't decrement below sizes_start + if nr_files is 0 */ + sizes_end = (sizes_index-- > sizes_start) ? sizes_index: sizes_start; + + sizes_index = sizes_start; + srand(1); + do_subtree(sizes_start, sizes_end, median_file_size, maximum_file_size, median_dir_branching, max_dir_branching); + +} + +int main(int argc, char * argv[]) +{ + /* initialized from argv[] */ + long median_file_size, + median_dir_branching, + median_dir_nr_files, + max_dir_nr_files, + max_dir_branching, + max_file_size; + long nr_of_files = 0; /* files to be created */ + + if (argc != 11) + { + print_usage(); + exit(1); + } + + write_buffer_size = atoi(argv[8]); + write_buffer = (char *) malloc(write_buffer_size); + memset (write_buffer, 'a', write_buffer_size); + + /* the number of bytes that we desire + this tree to consume. It will + actually consume more, because the + last file will overshoot by a random amount, and + because the directories and + metadata will consume space. */ + bytes_to_consume = atol(argv[1]); + max_file_size = atol(argv[3]); + median_file_size = atol(argv[2]); + /* Figure out how many random files + will fit into bytes_to_consume + bytes. We depend on resetting + rand() to get the same result + later. */ + nr_of_files = determine_nr_of_files(median_file_size, max_file_size, bytes_to_consume); + + strcpy(path, argv[9]); + mkdir(path, 0755); + stats = atol(argv[10]); + median_dir_branching = atol(argv[6]); + max_dir_branching = atol(argv[7]); + median_dir_nr_files = atol(argv[4]); + max_dir_nr_files = atol(argv[5]); + make_fractal_tree(median_file_size, max_file_size, median_dir_nr_files, max_dir_nr_files, median_dir_branching, max_dir_branching, nr_of_files); + print_stats(); + if(stats) printf("\nreiser_fract_tree finished\n"); + + return 0; +} + diff -u -r --new-file linux/fs/reiserfs/utils/benchmarks/run_mongo v2.4.0-test8/linux/fs/reiserfs/utils/benchmarks/run_mongo --- linux/fs/reiserfs/utils/benchmarks/run_mongo Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/benchmarks/run_mongo Fri Sep 15 18:52:51 2000 @@ -0,0 +1,33 @@ +#!/bin/bash + +if [ $# -lt 1 ] +then + echo + echo "Usage : run_mogo <device>" + echo + echo "Example :" + echo "# run_mogo /dev/hdx1" + echo + exit +fi + +DEVICE=$1 + +y="Yes" +echo "WARNING : All data will be erased on device=$DEVICE " +echo "Run ? (Yes | no)" +read x + +if [ -z $x ] +then + exit +fi + +if ! [ $x = $y ] +then + exit +fi + +./mongo.sh reiserfs $DEVICE /testfs reiserfs 1 +./mongo.sh ext2 $DEVICE /testfs ext2 1 +./mongo_compare ./results/ext2.tbl ./results/reiserfs.tbl ./results/html/ext2_vs_reiserfs diff -u -r --new-file linux/fs/reiserfs/utils/debugreiserfs/Makefile v2.4.0-test8/linux/fs/reiserfs/utils/debugreiserfs/Makefile --- linux/fs/reiserfs/utils/debugreiserfs/Makefile Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/debugreiserfs/Makefile Thu Aug 10 19:09:05 2000 @@ -0,0 +1,53 @@ +VPATH = ../bin +vpath %.c $(REISERFS_KERNEL_SOURCE) $(REISERFS_LIB) + + +# kernel files needed for debugreiserfs/unpack +KERNEL_C = prints.c item_ops.c +KERNEL_OBJ = prints.o item_ops.o + +# files from utils's lib directory needed for debugreiserfs/unpack +LIB_C = misc.c vfs.c reiserfs.c version.c +LIB_OBJ = misc.o vfs.o reiserfs.o version.o + + +DUMP_OBJS = debugreiserfs.o $(LIB_OBJ) $(KERNEL_OBJ) +UNPACK_OBJS = unpack.o $(LIB_OBJ) $(KERNEL_OBJ) + +DUMPFS = $(TMPBINDIR)/debugreiserfs +UNPACK = $(TMPBINDIR)/unpackreiserfs + + +all: $(DUMPFS) +#$(UNPACK) + +.c.o: + $(CC) $(CFLAGS) $< + +$(DUMPFS): $(DUMP_OBJS) + $(CC) -O2 $(LFLAGS) -o $(DUMPFS) $(DUMP_OBJS) + +$(UNPACK): $(UNPACK_OBJS) + $(CC) $(LFLAGS) -o $(UNPACK) $(UNPACK_OBJS) + +clean: + rm -f *.o $(DUMPFS) $(UNPACK) *~ + +dep: + gcc -MM $(IDIRS) *.c > .depend + for i in $(KERNEL_C); do gcc -MM $(IDIRS) $(REISERFS_KERNEL_SOURCE)/$$i >> .depend ; done + for i in $(LIB_C); do gcc -MM $(IDIRS) ../lib/$$i >> .depend ; done + +install: + cp -f $(DUMPFS) $(SBIN) + +uninstall: + rm -f $(SBIN)/debugreiserfs + +ifeq (.depend,$(wildcard .depend)) +include .depend +endif + + + + diff -u -r --new-file linux/fs/reiserfs/utils/debugreiserfs/README v2.4.0-test8/linux/fs/reiserfs/utils/debugreiserfs/README --- linux/fs/reiserfs/utils/debugreiserfs/README Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/debugreiserfs/README Sun May 14 23:15:02 2000 @@ -0,0 +1,14 @@ +This is to be a man page for debugreiserfs + +This program exists only to help to solve problem with reiserfsck. + +DEBUGREISERFS +It can be used to dump reiserfs partition out. +Called with -p it will calculate how many bytes have to be transfereed. +If -P specified, debugreiserfs will write the partition metadata out to stdout +which should be caugth with |gzip -c > whatever.gz + +UNPACKREISERFS deals with the file created in the above way. +zcat whatever.gz | unpackreiserfs /dev/wherever you want to reiserfs transferred to. + +5/10/99 diff -u -r --new-file linux/fs/reiserfs/utils/debugreiserfs/debugreiserfs.c v2.4.0-test8/linux/fs/reiserfs/utils/debugreiserfs/debugreiserfs.c --- linux/fs/reiserfs/utils/debugreiserfs/debugreiserfs.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/debugreiserfs/debugreiserfs.c Thu Aug 10 19:09:05 2000 @@ -0,0 +1,614 @@ +/* + * Copyright 1996-2000 Hans Reiser + */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <asm/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <sys/vfs.h> +#include <netinet/in.h> + +#include "misc.h" +#include "vfs.h" +#include "reiserfs_fs.h" +#include "reiserfs.h" + + +#define print_usage_and_exit() die ("Usage: %s [-b block-to-print][-idc] device\n\ +-i Causes to print all items of a leaf\n\ +-d content of directory items\n\ +-c content of direct items\n\ +-m bitmap blocks\n", argv[0]); + + + +struct reiserfs_fsstat { + int nr_internals; + int nr_leaves; + int nr_files; + int nr_directories; + int nr_unformatted; +} g_stat_info; +int g_comp_number = 0; + +/* + * options + */ +int opt_print_regular_file_content = 0;/* -c */ +int opt_print_directory_contents = 0; /* -d */ +int opt_print_leaf_items = 0; /* -i */ +int opt_print_objectid_map = 0; /* -o */ +int opt_print_block_map = 0; /* -m */ +/* when you want print one block specify -b # */ +int opt_block_to_print = -1; +int opt_pack = 0; /* -P will produce output that should be |gzip -c > whatever.gz */ + /* -p will calculate number of bytes needed to transfer the partition */ +int opt_print_journal; +int opt_pack_all = 0; + +struct super_block g_sb; + + +int print_mode (void) +{ + int mode = 0; + + if (opt_print_leaf_items == 1) + mode |= PRINT_LEAF_ITEMS; + if (opt_print_directory_contents == 1) + mode |= (PRINT_LEAF_ITEMS | PRINT_DIRECTORY_ITEMS); + if (opt_print_regular_file_content == 1) + mode |= (PRINT_LEAF_ITEMS | PRINT_DIRECT_ITEMS); + return mode; +} + + +void print_disk_tree (int block_nr) +{ + struct buffer_head * bh; + + bh = bread (g_sb.s_dev, block_nr, g_sb.s_blocksize); + if (B_IS_KEYS_LEVEL (bh)) { + int i; + struct disk_child * dc; + + g_stat_info.nr_internals ++; + print_block (bh, print_mode (), -1, -1); + + dc = B_N_CHILD (bh, 0); + for (i = 0; i <= B_NR_ITEMS (bh); i ++, dc ++) + print_disk_tree (dc->dc_block_number); + + } else if (B_IS_ITEMS_LEVEL (bh)) { + g_stat_info.nr_leaves ++; + print_block (bh, print_mode (), -1, -1); + } else { + print_block (bh, print_mode (), -1, -1); + die ("print_disk_tree: bad block type"); + } + brelse (bh); +} + + +void print_one_block (int block) +{ + struct buffer_head * bh; + + if (reiserfs_progs_test_le_bit (block % (g_sb.s_blocksize * 8), + SB_AP_BITMAP (&g_sb)[block / (g_sb.s_blocksize * 8)]->b_data)) + printf ("%d is used in true bitmap\n", block); + else + printf ("%d is free in true bitmap\n", block); + + bh = bread (g_sb.s_dev, block, g_sb.s_blocksize); + if (!not_formatted_node (bh->b_data, g_sb.s_blocksize)) + print_block (bh, PRINT_LEAF_ITEMS | PRINT_DIRECTORY_ITEMS | (opt_print_regular_file_content == 1 ? PRINT_DIRECT_ITEMS : 0), -1, -1); + else + printf ("Looks like unformatted\n"); + brelse (bh); + return; +} + + +static char * parse_options (int argc, char * argv []) +{ + int c; + char * tmp; + + while ((c = getopt (argc, argv, "b:icodmMpPaAj")) != EOF) { + switch (c) { + case 'b': /* print a single node */ + opt_block_to_print = strtol (optarg, &tmp, 0); + if (*tmp) + die ("parse_options: bad block size"); + break; + + case 'p': /* calculate number of bytes, that need to be transfered */ + opt_pack = 'c'; break; + case 'P': /* dump a partition */ + opt_pack = 'p'; break; + case 'a': + opt_pack_all = 'c'; break; + case 'A': + opt_pack_all = 'p'; break; + + case 'i': /* print items of a leaf */ + opt_print_leaf_items = 1; break; + + case 'd': /* print directories */ + opt_print_directory_contents = 1; break; + + case 'c': /* print contents of a regular file */ + opt_print_regular_file_content = 1; break; + + case 'o': /* print a objectid map */ + opt_print_objectid_map = 1; break; + + case 'm': /* print a block map */ + opt_print_block_map = 1; break; + case 'M': /* print a block map with details */ + opt_print_block_map = 2; break; + case 'j': + opt_print_journal = 1; break; /* print transactions */ + } + } + if (optind != argc - 1) + /* only one non-option argument is permitted */ + print_usage_and_exit(); + + return argv[optind]; +} + + +/* journal has permanent location (currently) (after first bitmap + block and constant size (JOURNAL_BLOCK_COUNT + 1) */ +int journal_block (struct super_block * s, __u32 block) +{ +/* + if (block > SB_AP_BITMAP (s)[0]->b_blocknr && + block < SB_AP_BITMAP (s)[0]->b_blocknr + JOURNAL_BLOCK_COUNT + 1) + return 1; +*/ + if (block >= reiserfs_get_journal_block (s) && + block <= reiserfs_get_journal_block (s) + le32_to_cpu(s->u.reiserfs_sb.s_rs->s_orig_journal_size) + 1) + return 1; + return 0; +} + + +int data_block (struct super_block * s, __u32 block) +{ + int i; + + if (block == REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize) + /* super block, not data block */ + return 0; + + for (i = 0; i < SB_BMAP_NR (s); i ++) + if (block == SB_AP_BITMAP (s)[i]->b_blocknr) + /* bitmap block, not data block */ + return 0; + + if (journal_block (s, block)) + return 0; + + return 1; +} + + +/* this dumps file sustem to stdout as a such way: + 16 bit blocksize + 32 bit blocknumber + 16 bit - record length + the record of given length + .. + + to pack : print_disk_layout -p /dev/xxx | gzip -c > xxx.gz + to unpack : zcat xxx.gz | unpackreiserfs /dev/xxx +*/ + + +static int get_total_block_number (void) +{ + int i, j; + int retval = 0; + + retval = 0; + + if (opt_pack_all) + retval = SB_BLOCK_COUNT (&g_sb); + else { + for (i = 0; i < SB_BMAP_NR (&g_sb); i ++) { + for (j = 0; j < g_sb.s_blocksize * 8; j ++) + if (i * g_sb.s_blocksize * 8 + j < SB_BLOCK_COUNT (&g_sb) && + reiserfs_progs_test_le_bit (j, SB_AP_BITMAP (&g_sb)[i]->b_data)) + retval ++; + } + } + return retval; +} + + +int direct_items = 0, direct_item_total_length = 0; +int items = 0; +int unreachable_items = 0; + +/* fill direct items with 0s */ +static void zero_direct_items (char * buf) +{ + int i; + struct item_head * ih; + + if (((struct block_head *)buf)->blk_level != DISK_LEAF_NODE_LEVEL) + return; + + /* leaf node found */ + ih = (struct item_head *)(buf + BLKH_SIZE); + + for (i = 0; i < ((struct block_head *)buf)->blk_nr_item; i ++, ih ++) { + if (is_direct_le_ih (ih)) { + /* FIXME: do not zero symlinks */ + direct_items ++; + direct_item_total_length += ih->ih_item_len; + memset (buf + ih->ih_item_location, 0, ih->ih_item_len); + } + items ++; + // item left by fsck as unreachable + if (unreachable_item (ih)) + unreachable_items ++; + } +} + + + +void pack_partition (struct super_block * s) +{ + int i, j, k; + uint32_t blocknumber32; + uint16_t reclen16, data16; + __u32 done = 0; + char * data; + long long bytes_to_transfer = 0; + struct buffer_head * bh; + int total_block_number; + + + total_block_number = get_total_block_number (); + + + /* write filesystem's block size to stdout as 16 bit number */ + reclen16 = htons (s->s_blocksize); + if (opt_pack == 'p' || opt_pack_all == 'p') + write (1, &reclen16, sizeof (uint16_t)); + bytes_to_transfer = sizeof (uint16_t); + + /* go through blocks which are marked used in cautious bitmap */ + for (i = 0; i < SB_BMAP_NR (s); i ++) { + for (j = 0; j < s->s_blocksize; j ++) { + /* make sure, that we are not out of the device */ + if (i * s->s_blocksize * 8 + j * 8 == SB_BLOCK_COUNT (s)) + goto out_of_bitmap; + + if (i * s->s_blocksize * 8 + j * 8 + 8 > SB_BLOCK_COUNT (s)) + die ("build_the_tree: Out of bitmap"); + + if (opt_pack_all == 0) + if (SB_AP_BITMAP (s)[i]->b_data[j] == 0) { + /* skip busy block if 'a' not specified */ + continue; + } + + /* read 8 blocks at once */ + bh = bread (s->s_dev, i * s->s_blocksize + j, s->s_blocksize * 8); + for (k = 0; k < 8; k ++) { + __u32 block; + + block = i * s->s_blocksize * 8 + j * 8 + k; + + if (opt_pack_all == 0 && (SB_AP_BITMAP (s)[i]->b_data[j] & (1 << k)) == 0) + continue; +#if 0 + if ((SB_AP_BITMAP (s)[i]->b_data[j] & (1 << k)) == 0 || /* k-th block is free */ + block < SB_BUFFER_WITH_SB (s)->b_blocknr) /* is in skipped for drive manager area */ + continue; +#endif + + print_how_far (&done, total_block_number); + + data = bh->b_data + k * s->s_blocksize; + + if (not_formatted_node (data, s->s_blocksize)) { + /* ok, could not find formatted node here. But + this can be commit block, or bitmap which has + to be transferred */ + if (!not_data_block (s, block)) { + /* this is usual unformatted node. Transfer + its number only to erase previously existed + formatted nodes on the partition we will + apply transferred metadata to */ + + /* size of following record in network byte order */ + reclen16 = htons (2); + + /* the record record */ + data16 = htons (MAX_HEIGHT + 1);/*?*/ + data = (char *)&data16; + } else { + /* write super block and bitmap block must be transferred as are */ + /* size of record */ + reclen16 = htons (s->s_blocksize); + + /* the record itself */ + data = data; + } + } else { + /* any kind of formatted nodes gets here (super + block, desc block of journal): FIXME: it would + be useful to be able to find commit blocks */ + zero_direct_items (data); + /* FIXME: do other packing */ + /* write size of following record */ + reclen16 = htons (s->s_blocksize); + + /* the record itself */ + data = data; + +#if 0 + if (blkh->blk_level > DISK_LEAF_NODE_LEVEL) { + /* block must look like internal node on the target + partition. But (currently) fsck do not consider internal + nodes, therefore we do not have to transfer contents of + internal nodes */ + + /* size of following record in network byte order */ + reclen16 = htons (2); + + /* the record itself */ + data16 = htons (DISK_LEAF_NODE_LEVEL + 1); + data = (char *)&data16; + } else { + + /* leaf node found */ + ih = (struct item_head *)(blkh + 1); + + /* fill direct items with 0s */ + for (l = 0; l < blkh->blk_nr_item; l ++, ih ++) + if (is_direct_le_ih (ih)) { + direct_items ++; + direct_item_total_length += ih->ih_item_len; + memset ((char *)blkh + ih->ih_item_location, 0, ih->ih_item_len); + } + + /* write size of following record */ + reclen16 = htons (s->s_blocksize); + + /* the record itself */ + data = (char *)blkh; + } +#endif + } + + /*fprintf (stderr, "block %d, reclen %d\n", block, ntohs (reclen16));*/ + + /* write block number */ + blocknumber32 = htonl (block); + bytes_to_transfer += sizeof (uint32_t) + sizeof (uint16_t) + ntohs (reclen16); + if (opt_pack == 'p' || opt_pack_all == 'p') { + write (1, &blocknumber32, sizeof (uint32_t)); + /* write record len */ + write (1, &reclen16, sizeof (uint16_t)); + /* write the record */ + write (1, data, ntohs (reclen16)); + } + } + + bforget (bh); + } + } + + out_of_bitmap: + fprintf (stderr, "done\n"); + if (opt_pack == 'c' || opt_pack_all == 'c') + fprintf (stderr, "Bytes to transfer %Ld, sequential 0s %d in %d sequeneces (%items (%d unreacable))\n", + bytes_to_transfer, direct_item_total_length, direct_items, items, unreachable_items); + else + fprintf (stderr, "Bytes dumped %Ld, sequential 0s %d in %d sequeneces\n", + bytes_to_transfer, direct_item_total_length, direct_items); + + +} + + + + +/* print all valid transactions and found dec blocks */ +static void print_journal (struct super_block * s) +{ + struct buffer_head * d_bh = 0, * c_bh = 0; + struct reiserfs_journal_desc * desc = 0; + struct reiserfs_journal_commit *commit ; + int end_journal; + int start_journal; + int i, j; + int first_desc_block = 0; + int wrapped = 0; + int valid_transactions = 0; + + start_journal = reiserfs_get_journal_block (s); + end_journal = start_journal + JOURNAL_BLOCK_COUNT; + printf ("Start scanning from %d\n", start_journal); + + for (i = start_journal; i < end_journal; i ++) { + d_bh = bread (s->s_dev, i, s->s_blocksize); + if (is_desc_block (d_bh)) { + int commit_block; + + if (first_desc_block == 0) + /* store where first desc block found */ + first_desc_block = i; + + print_block (d_bh); /* reiserfs_journal_desc structure will be printed */ + desc = bh_desc (d_bh); + + commit_block = d_bh->b_blocknr + le32_to_cpu(desc->j_len) + 1; + if (commit_block >= end_journal) { + printf ("-- wrapped?"); + wrapped = 1; + break; + } + + c_bh = bread (s->s_dev, commit_block, s->s_blocksize); + commit = bh_commit (c_bh); + if (does_desc_match_commit (desc, commit)) { + printf ("commit block %d (trans_id %d, j_len %d) does not match\n", commit_block, + le32_to_cpu(commit->j_trans_id), le32_to_cpu(commit->j_len)); + brelse (c_bh) ; + brelse (d_bh); + continue; + } + + valid_transactions ++; + printf ("(commit block %d) - logged blocks (", commit_block); + for (j = 0; j < le32_to_cpu(desc->j_len); j ++) { + if (j < JOURNAL_TRANS_HALF) { + printf (" %d", le32_to_cpu(desc->j_realblock[j])); + } else { + printf (" %d", le32_to_cpu(commit->j_realblock[i - JOURNAL_TRANS_HALF])); + } + } + printf ("\n"); + i += le32_to_cpu(desc->j_len) + 1; + brelse (c_bh); + } + brelse (d_bh); + } + + if (wrapped) { + c_bh = bread (s->s_dev, first_desc_block - 1, s->s_blocksize); + commit = bh_commit (c_bh); + if (does_desc_match_commit (desc, commit)) { + printf ("No! commit block %d (trans_id %d, j_len %d) does not match\n", first_desc_block - 1, + le32_to_cpu(commit->j_trans_id), le32_to_cpu(commit->j_len)); + } else { + printf ("Yes! (commit block %d) - logged blocks (\n", first_desc_block - 1); + for (j = 0; j < le32_to_cpu(desc->j_len); j ++) { + if (j < JOURNAL_TRANS_HALF) { + printf (" %d", le32_to_cpu(desc->j_realblock[j])); + } else { + printf (" %d", le32_to_cpu(commit->j_realblock[i - JOURNAL_TRANS_HALF])); + } + } + printf ("\n"); + } + brelse (c_bh) ; + brelse (d_bh); + } + + printf ("%d valid transactions found\n", valid_transactions); + + { + struct buffer_head * bh; + struct reiserfs_journal_header * j_head; + + bh = bread (s->s_dev, le32_to_cpu(s->u.reiserfs_sb.s_rs->s_journal_block) + le32_to_cpu(s->u.reiserfs_sb.s_rs->s_orig_journal_size), + s->s_blocksize); + j_head = (struct reiserfs_journal_header *)(bh->b_data); + + printf ("#######################\nJournal header:\n" + "j_last_flush_trans_id %u\n" + "j_first_unflushed_offset %u\n" + "j_mount_id %u\n", le32_to_cpu(j_head->j_last_flush_trans_id), le32_to_cpu(j_head->j_first_unflushed_offset), + le32_to_cpu(j_head->j_mount_id)); + brelse (bh); + } +} + + +struct super_operations reiserfs_sops = {0,}; + +int main (int argc, char * argv[]) +{ + char * file_name; + int dev, i; + +#if 1 + if (1) { + /* ???? */ + schedule (); + iput (0); + } +#endif + + fprintf (stderr, "\n<------REISERFS-DEBUG-TOOL-v2, 1999-------->\n"); + file_name = parse_options (argc, argv); + + + dev = open (file_name, O_RDONLY); + if (dev == -1) + die ("debugreiserfs: Can not open device %s: %s\n", file_name, strerror (errno)); + g_sb.s_dev = dev; + + if (uread_super_block (&g_sb)) + die ("debugreiserfs: no reiserfs found on %s", file_name); + if (uread_bitmaps (&g_sb)) + die ("debugreiserfs: read_bitmap failed"); + + if (opt_pack || opt_pack_all) { + pack_partition (&g_sb); + } else { + /* dump file system to stdout */ + if (opt_block_to_print != -1) { + print_one_block (opt_block_to_print); + goto end; + } + + print_block (SB_BUFFER_WITH_SB (&g_sb)); + + if (opt_print_journal) + print_journal (&g_sb); + + if (opt_print_objectid_map == 1) + print_objectid_map (&g_sb); + + if (opt_print_block_map) { + print_bmap (&g_sb, opt_print_block_map == 1 ? 1 : 0); + } + + if (opt_print_regular_file_content || opt_print_directory_contents || + opt_print_leaf_items) { + print_disk_tree (SB_ROOT_BLOCK (&g_sb)); + + /* print the statistic */ + printf ("File system uses %d internal + %d leaves + %d unformatted nodes = %d blocks\n", + g_stat_info.nr_internals, g_stat_info.nr_leaves, g_stat_info.nr_unformatted, + g_stat_info.nr_internals + g_stat_info.nr_leaves + g_stat_info.nr_unformatted); + } + } + + + end: + /* brelse bitmaps */ + if (SB_AP_BITMAP (&g_sb)) { + for (i = 0; i < SB_BMAP_NR (&g_sb); i ++) { + brelse (SB_AP_BITMAP (&g_sb)[i]); + } + freemem (SB_AP_BITMAP (&g_sb)); + } + + /* brelse buffer containing super block */ + brelse (SB_BUFFER_WITH_SB (&g_sb)); + + /* 0 means: write all buffers and free memory */ + fsync_dev (0); + + return 0; +} +/* end of main */ + + + diff -u -r --new-file linux/fs/reiserfs/utils/debugreiserfs/unpack.c v2.4.0-test8/linux/fs/reiserfs/utils/debugreiserfs/unpack.c --- linux/fs/reiserfs/utils/debugreiserfs/unpack.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/debugreiserfs/unpack.c Sun May 14 23:15:02 2000 @@ -0,0 +1,160 @@ +/* + * Copyright 1999-2000 Hans Reiser + */ +#include <stdio.h> +#include <errno.h> +#include <malloc.h> +#include <string.h> +#include <fcntl.h> +#include <netinet/in.h> +#include <unistd.h> +#include <sys/stat.h> +#include <asm/types.h> + +#include "misc.h" + +#define print_usage_and_exit() die ("\ +Usage: zcat packed_partition.gz | unpack [-f] /dev/dest\n\ +-f do not overwrite unformatted nodes\n", argv[0]); + +int opt_skip_unfms = 0; +int opt_do_not_write = 0; + +int waiting_read (int fd, char * buf, int count) +{ + int rd, done = 0; + + while (count) { + rd = read (fd, buf, count); + if (rd < 1) + return rd; + buf += rd; + count -= rd; + done += rd; + } + return done; +} + + +int main (int argc, char ** argv) +{ + uint16_t blocksize, reclen16; + uint32_t blocknumber32; + int c; + char * buf; + int fd; + int res; + struct stat st; + + if (argc < 2) { + printf ("Usage: gunzip -c | unpack [-s][-n] /dev/dest\n"); + return 0; + } + + while ((c = getopt (argc, argv, "sn")) != EOF) { + switch (c) { + case 's': /* skip writing of unformatted nodes */ + opt_skip_unfms = 1; + break; + case 'n': + opt_do_not_write = 1; + break; + default: + printf ("Usage: gunzip -c | unpack [-s] /dev/dest\n"); + return 0; + } + } + + /* get file system's block size */ + read (0, &blocksize, sizeof (uint16_t)); + blocksize = ntohs (blocksize); + fprintf (stderr, "blocksize = %d\n", blocksize); + + buf = (char *)malloc (blocksize); + if (!buf) { + perror ("malloc failed"); + return 1; + } + + /* we need to skip the below: + reiserfs: read_bitmaps: 0 blocks differ in true and cautious bitmaps + reiserfs: read_bitmaps: 1 blocks differ in true and cautious bitmaps + */ + +/* + read (0, buf, strlen ("reiserfs: read_bitmaps: 0 blocks differ in true and cautious bitmaps\n")); + if (strncmp (buf, "reiserfs", strlen ("reiserfs"))) { + fprintf (stderr, "Bad signature 1\n"); + return 1; + } +*/ + /* + read (0, buf, strlen ("reiserfs: read_bitmaps: 1 blocks differ in true and cautious bitmaps\n")); + if (strncmp (buf, "reiserfs", strlen ("reiserfs"))) { + fprintf (stderr, "Bad signature 2\n"); + return 1; + }*/ + + if (is_mounted (argv[optind])) { + /* check forced on clean filesystem, maybe we can rebuild it (if it is mounted read-only). Later. */ + die ("unpack: '%s' contains a mounted file system\n", argv[optind]); + } + + if (stat (argv[optind], &st) == -1) + die ("unpack: stat failed: %s", strerror (errno)); + if (!S_ISBLK (st.st_mode)) + die ("unpck: %s is not a block device", argv[optind]); + + fd = open (argv[optind], O_CREAT | O_RDWR); + if (fd == -1) { + perror ("open failed"); + return 1; + } + + while ((res = waiting_read (0, (char *)&blocknumber32, sizeof (uint32_t))) == sizeof (uint32_t)) { + /* read block number from stdin */ +/* + if (blocknumber32 == 0) { + printf ("exit\n"); + exit (0); + } +*/ + blocknumber32 = ntohl (blocknumber32); + + /* read 16 bit record length */ + if (waiting_read (0, (char *)&reclen16, sizeof (uint16_t)) != sizeof (uint16_t)) { + perror ("read reclen failed"); + return 1; + } + reclen16 = ntohs (reclen16); + + fprintf (stderr, "%d reclen %d\n", blocknumber32, reclen16); + + /* read the record itself */ + if ((res = waiting_read (0, buf, reclen16)) != reclen16) { + fprintf (stderr, "read record failed (%d %d)\n", res, reclen16); + return 1; + } + + + /* the only one requirement to this block: does not look like + leaf node. If you unpacked damaged partition already you + might consider using -s to save time */ + if ((opt_skip_unfms && reclen16 == 2) || opt_do_not_write == 1) + continue; + + + /* write to argv[1] */ + if (reiserfs_llseek (fd, (loff_t)blocknumber32 * (loff_t)blocksize, SEEK_SET) == (loff_t)-1) { + perror ("llseek failed"); + return 1; + } + if (write (fd, buf, reclen16) != reclen16) { + perror ("write failed"); + return 1; + } + } + + fprintf (stderr, "done\n"); + return 0; +} diff -u -r --new-file linux/fs/reiserfs/utils/emu/Makefile v2.4.0-test8/linux/fs/reiserfs/utils/emu/Makefile --- linux/fs/reiserfs/utils/emu/Makefile Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/emu/Makefile Sun May 14 23:15:02 2000 @@ -0,0 +1,39 @@ +VPATH = ../bin +vpath %.c $(REISERFS_KERNEL_SOURCE) $(REISERFS_LIB) + + +# kernel files needed for emu +KERNEL_C = inode.c namei.c stree.c do_balan.c lbalance.c ibalance.c prints.c item_ops.c tail_conversion.c file.c dir.c fix_node.c bitmap.c objectid.c hashes.c super.c buffer2.c +KERNEL_OBJS = inode.o namei.o stree.o do_balan.o lbalance.o ibalance.o prints.o item_ops.o tail_conversion.o file.o dir.o fix_node.o bitmap.o objectid.o hashes.o super.o buffer2.o + +# files from utils's lib directory needed for emu +LIB_C = misc.c vfs.c reiserfs.c version.c +LIB_OBJS = misc.o vfs.o reiserfs.o version.o + + +EMU_OBJS = emu.o $(LIB_OBJS) $(KERNEL_OBJS) + +EMU = $(TMPBINDIR)/emu + +all: $(EMU) + +.c.o: + $(CC) $(CFLAGS) $< + +$(EMU): $(EMU_OBJS) + $(CC) $(LFLAGS) -o $(EMU) $(EMU_OBJS) + +clean: + rm -f *.o $(EMU) *~ + +dep: + gcc -MM $(IDIRS) *.c > .depend + for i in $(KERNEL_C); do gcc -MM $(IDIRS) $(REISERFS_KERNEL_SOURCE)/$$i >> .depend ; done + for i in $(LIB_C); do gcc -MM $(IDIRS) ../lib/$$i >> .depend ; done + + + + +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff -u -r --new-file linux/fs/reiserfs/utils/emu/emu.c v2.4.0-test8/linux/fs/reiserfs/utils/emu/emu.c --- linux/fs/reiserfs/utils/emu/emu.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/emu/emu.c Sat Aug 12 01:46:40 2000 @@ -0,0 +1,900 @@ +/* + * Copyright 1999-2000 Hans Reiser + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <dirent.h> +#include <sys/types.h> +#include <asm/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> + + +#include "misc.h" +#include "vfs.h" +#include "reiserfs_fs.h" +#include "reiserfs.h" + + +struct super_block g_sb; + + + +#define print_usage_and_exit() die ("Usage: %s device\n\n", argv[0]) + + + +void reiserfs_prepare_for_journal(struct super_block *p_s_sb, + struct buffer_head *bh, int wait) { +} + +void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb, + struct buffer_head *bh) { +} + + +/* de->d_iname contains name */ +static struct inode * get_inode_by_name (struct inode * dir, struct dentry * dentry) +{ + dentry->d_name.len = strlen (dentry->d_iname); + dentry->d_name.name = dentry->d_iname; + + errno = reiserfs_lookup (dir, dentry); + if (errno) { + perror ("lookup failed"); + return 0; + } + + return dentry->d_inode; +} + + +struct inode * g_pwd; + +static void do_create (char * args) +{ + struct dentry de; + + if (sscanf (args, "%s", de.d_iname) != 1) { + reiserfs_warning ("create: usage: create filename\n"); + return; + } + + if (get_inode_by_name (g_pwd, &de) == 0) { + reiserfs_create (g_pwd, &de, 0100644); + iput (de.d_inode); + } else + reiserfs_warning ("create: file %s exists\n", de.d_name.name); + +} + +#include <assert.h> +char longname[4097]; +static void do_crash (void) +{ + int i, j; + int label_len; + + + printf ("1234567890\n"); + for (i = 10; i < 4033; i ++) { + sprintf (longname, "%d-", i); + label_len = strlen (longname); + assert (label_len <= i); + + for (j = label_len; j < i; j ++) + longname[j] = '0' + j % 10; + longname [i] = 0; + + //printf ("%s\n", buf); + do_create (longname); +/* + if ((fd = creat (buf, 0644)) == -1) { + printf ("could not create file with name %d bytes long\n%s\n", + i, strerror (errno)); + exit (); + } + close (fd); +*/ + printf ("%d - ok\n", i); + } + + return; + +} + + +static int do_mkdir (char * args) +{ + struct dentry de; + + if (sscanf (args, "%255s", de.d_iname) != 1) { + reiserfs_warning ("mkdir: usage: mkdir dirname\n"); + return 1; + } + + if (get_inode_by_name (g_pwd, &de) == 0) { + reiserfs_mkdir (g_pwd, &de, 0100644); + iput (de.d_inode); + return 0; + } + reiserfs_warning ("mkdir: dir %s exists\n", de.d_name.name); + iput (de.d_inode); + return 1; +} + +static void do_rmdir (char * args) +{ + struct dentry de; + + if (sscanf (args, "%255s", de.d_iname) != 1) { + reiserfs_warning ("rmdir: usage: rmdir dirname\n"); + return; + } + + if (get_inode_by_name (g_pwd, &de) != 0) { + reiserfs_rmdir (g_pwd, &de); + iput (de.d_inode); + } else + reiserfs_warning ("rmdir: dir %s is not exists\n", de.d_name.name); +} + + +static ssize_t _do_write (struct inode * inode, ssize_t count, loff_t offset, char * buf) +{ + struct buffer_head bh = {0,}; + struct buffer_head * pbh = &bh; + loff_t pos; + ssize_t retval = count; + int nr; + + bh.b_data = getmem (g_sb.s_blocksize); + bh.b_size = g_sb.s_blocksize; + // atomic_set (&(bh.b_count), 1); + + pos = offset; + while (count) { + inode->i_op->get_block (inode, offset / g_sb.s_blocksize, &bh, 1); + nr = bh.b_size - offset % bh.b_size; + if (count < nr) + nr = count; + if (nr != bh.b_size) { + ll_rw_block (READ, 1, &pbh); + wait_on_buffer (&bh); + } + memcpy (bh.b_data + offset % bh.b_size, buf, nr); + mark_buffer_uptodate (&bh, 1); + mark_buffer_dirty (&bh, 1); + bwrite (&bh); + buf += nr; + count -= nr; + offset += nr; + pos += nr; + } + + freemem (bh.b_data); + if (pos > inode->i_size) { + inode->i_size = pos; + mark_inode_dirty (inode); + } + return retval; +} + + +static void do_write (char * args) +{ + int i; + int count; + loff_t offset; + char * buf; + struct dentry de; + struct inode * inode; + struct file file; + + if (sscanf (args, "%255s %Ld %d", de.d_iname, &offset, &count) != 3) { + reiserfs_warning ("write: usage: write filename offset count\n"); + return; + } + + buf = (char *)malloc (count); + if (buf == 0) + reiserfs_panic (&g_sb, "do_write: no memory, or function not defined"); + for (i = 0; i < count; i ++) + buf[i] = '0' + i % 10; + + if ((inode = get_inode_by_name (g_pwd, &de)) != 0) { + file.f_error = 0; + file.f_dentry = &de; + file.f_pos = offset; + generic_file_write (&file, buf, count, &file.f_pos); + //_do_write (inode, count, offset, buf); + iput (inode); + } else { + reiserfs_warning ("do_write: file \'%s\' does not exist\n", de.d_iname); + } + +} + +static void do_truncate (char * args) +{ + int size; + struct file file; + struct dentry de; + + if (sscanf (args, "%255s %d", de.d_iname, &size) != 2) { + reiserfs_warning ("usage: truncate filename size\n"); + return; + } + + if (get_inode_by_name (g_pwd, &de)) { + file.f_dentry = &de; + file.f_flags = 0; + /* if regular file */ + file.f_op = de.d_inode->i_fop; + + de.d_inode->i_size = size; + mark_inode_dirty (de.d_inode); + de.d_inode->i_op->truncate (de.d_inode); + iput (de.d_inode); + } +} + +static void do_read (char * args) +{ + int count; + loff_t offset; + struct file file; + char * buf; + struct dentry de; + struct inode * inode; + + if (sscanf (args, "%255s %Ld %d", de.d_iname, &offset, &count) != 3) { + reiserfs_warning ("do_read: usage: read filename offset count\n"); + return; + } + + if ((inode = get_inode_by_name (g_pwd, &de)) != 0) { + file.f_dentry = &de; + file.f_flags = 0; + file.f_pos = 0; + /* if regular file */ + file.f_op = de.d_inode->i_fop; + buf = (char *)malloc (count); + if (buf == 0) + reiserfs_panic (&g_sb, "do_read: no memory, or function not defined"); + memset (buf, 0, count); + + file.f_op->read (&file, buf, count, &offset); + iput (inode); + free (buf); + } +} + + + +static void do_fcopy (char * args) +{ + char * src; + char * dest; + int fd_source; + int rd, bufsize; + loff_t offset; + char * buf; + struct dentry de; + struct inode * inode; + + + src = args; + src [strlen (src) - 1] = 0; + dest = strrchr (args, '/') + 1; + if (dest == 0) + die ("/ must be in the name of source"); + + fd_source = open (src, O_RDONLY); + if (fd_source == -1) + die ("fcopy: could not open \"%s\": %s", + src, strerror (errno)); + + bufsize = 1024; + buf = (char *)malloc (bufsize); + if (buf == 0) + reiserfs_panic (&g_sb, "fcopy: no memory, or function not defined"); + + strcpy (de.d_iname, dest); + + if ((inode = get_inode_by_name (g_pwd, &de)) == 0) { + reiserfs_create (g_pwd, &de, 0100644); + inode = de.d_inode; + } else { + reiserfs_warning ("fcopy: file %s exists\n", de.d_name.name); + return; + } + + offset = 0; + while ((rd = read(fd_source, buf, bufsize)) > 0) + offset += _do_write (inode, rd, offset, buf); + + iput (inode); + free (buf); + + close(fd_source); +} + + +struct linux_dirent { + unsigned long d_ino; + unsigned long d_off; + unsigned short d_reclen; + char d_name[1]; +}; + +struct getdents_callback { + struct linux_dirent * current_dir; + struct linux_dirent * previous; + int count; + int error; +}; + +int dir_size; + +#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) +#define ROUND_UP_2(x) (((x)+sizeof(long)-1) & ~(sizeof(long)-1)) + + +static int filldir(void * __buf, const char * name, int namlen, off_t offset, ino_t ino) +{ + struct linux_dirent * dirent; + struct getdents_callback * buf = (struct getdents_callback *) __buf; + int reclen = ROUND_UP_2(12 + namlen + 1); + + buf->error = -EINVAL; + if (reclen > buf->count) + return -EINVAL; + dirent = buf->previous; + if (dirent) + put_user(offset, &dirent->d_off); + + dirent = buf->current_dir; + buf->previous = dirent; + put_user(ino, &dirent->d_ino); + put_user(reclen, &dirent->d_reclen); + copy_to_user(dirent->d_name, name, namlen); + put_user(0, dirent->d_name + namlen); + + ((char *) dirent) += reclen; + buf->current_dir = dirent; + buf->count -= reclen; + + dir_size += DEH_SIZE + namlen; + + return 0; +} + + +int emu_getdents (struct file * file, void * dirbuf, int dirbuf_size) +{ + struct getdents_callback buf; + struct linux_dirent * lastdirent; + int error; + + buf.current_dir = (struct linux_dirent *) dirbuf; + buf.previous = NULL; + buf.count = dirbuf_size; + buf.error = 0; + file->f_op->readdir (file, &buf, filldir); + error = buf.error; + lastdirent = buf.previous; + if (lastdirent) { + put_user(file->f_pos, &lastdirent->d_off); + error = dirbuf_size - buf.count; + } + + return error; +} + + +void do_readdir (void) +{ + struct dentry de; + struct file file; +/* struct dirent * dirent;*/ + struct linux_dirent * p; + int entry_nr = 0; + int retval, i; + char * dirbuf; + + dir_size = 0; + + de.d_inode = g_pwd; + file.f_dentry = &de; + file.f_pos = 0; + file.f_op = de.d_inode->i_fop; + + dirbuf = malloc (3933); + while ((retval = emu_getdents (&file, dirbuf, 3933)) > 0) { + p = (struct linux_dirent *)dirbuf; + i = 0; + do { + p = (struct linux_dirent *)(dirbuf + i); + printf ("%s\n", p->d_name); + retval -= p->d_reclen; + i += p->d_reclen; + entry_nr ++; + } while (retval > 0); +#if 0 + while (p->d_reclen && (char *)p + p->d_reclen < dirbuf + retval) { +/* printf ("linux/drivers/scsi/%s\n", p->d_name);*/ + printf ("%s\n", p->d_name); + p = (struct linux_dirent *)((char *)p + p->d_reclen); + entry_nr ++; + } +#endif + } + if (retval != 0) + printf ("getdents failed: %s", strerror (retval)); + free (dirbuf); + + printf ("%d entries, size %d\n", entry_nr, dir_size); +} + + +/* iput pwd inode, iget new pwd inode */ +static int do_cd (char * args) +{ + struct inode * dir; + struct dentry de; + + if (sscanf (args, "%255s", de.d_iname) != 1) { + reiserfs_warning ("do_cd: usage: cd dirname\n"); + return 1; + } + dir = get_inode_by_name (g_pwd, &de); + if (dir != 0 && S_ISDIR (dir->i_mode)) { + iput (g_pwd); + g_pwd = dir; + return 0; + } + reiserfs_warning ("do_cd: no such file or not a directory \"%s\"\n", de.d_iname); + return 1; +} + +char buf1[1024], buf2[1024]; + +/* path is in buf1 */ +static int do_path_cd (char * path) +{ + char * p, * slash; + + strcpy (buf2, path); + p = buf2; +/* + while ((slash = strchr (p, '/'))) { + *slash = 0; + if (do_cd (p)) { + printf ("cd: wrong path element: %s\n", p); + return 1; + } + p = slash + 1; + } + if (do_cd (p)) { + } +*/ + while (1) { + slash = strchr (p, '/'); + if (slash) + *slash = 0; + if (do_cd (p)) { + printf ("cd: wrong path element: %s\n", p); + return 1; + } + if (!slash) + break; + p = slash + 1; + } + return 0; +} + + + + + + +static int do_symlink (char * args) +{ + struct dentry de; + + if (sscanf (args, "%255s %s", de.d_iname, buf1) != 2) { + reiserfs_warning ("symlink: usage: symlink filename\n"); + return 0; + } + + if (get_inode_by_name (g_pwd, &de) == 0) { + reiserfs_symlink (g_pwd, &de, buf1); + iput (de.d_inode); + } else + reiserfs_warning ("symlink: file %s exists\n", de.d_name.name); + return 0; +} + + +static int do_readlink (char * args) +{ + struct dentry de; + + if (sscanf (args, "%255s", de.d_iname) != 1) { + reiserfs_warning ("readlink: usage: readlink filename\n"); + return 0; + } + + if (get_inode_by_name (g_pwd, &de)) { + if (S_ISLNK (de.d_inode->i_mode)) { + de.d_inode->i_op->readlink (&de, buf1, sizeof (buf1)); + buf1[de.d_inode->i_size] = 0; + reiserfs_warning ("The name in symlink: %s\n", buf1); + } else + reiserfs_warning ("readlink: %s is not a symlink\n", de.d_name.name); + iput (de.d_inode); + } else + reiserfs_warning ("readlink: file %s exists\n", de.d_name.name); + + return 0; +} + + + +#include <dirent.h> + +void do_dcopy (char * args) +{ + char name[256], * p; + char command [256]; + DIR * d; + struct dirent * de; + struct stat st; + + if (sscanf (args, "%255s", name) != 1) { + reiserfs_warning ("do_dcopy: usage: dcopy dirname\n"); + return; + } + if ((d = opendir (name)) == NULL || chdir (name) == -1) { + printf ("opendir failed: %s\n", strerror (errno)); + return; + } + + p = strrchr (name, '/'); + p ++; + if (do_mkdir (p)) + return; + if (do_cd (p)) + return; + + while ((de = readdir (d)) != NULL) { + if (lstat (de->d_name, &st) == -1) { + printf ("%s\n", strerror (errno)); + return; + } + if (S_ISREG (st.st_mode)) { + printf ("%s/%s\n", name, de->d_name); + sprintf (command, "%s/%s\n", name, de->d_name); + do_fcopy (command); + continue; + } + if (S_ISLNK (st.st_mode)) { + if (readlink (de->d_name, buf1, sizeof (buf1)) == -1) { + printf ("readlink failed: %s\n", strerror (errno)); + continue; + } + buf1[st.st_size] = 0; + printf ("%s/%s->%s\n", name, de->d_name, buf1); + sprintf (command, "%s %s\n", de->d_name, buf1); + do_symlink (command); + continue; + } + } + +} + + + +void do_diff (char * args) +{ + char orig[256]; + int fd, rd1, rd2; + struct file file; + struct dentry de; + + if (sscanf (args, "%80s %255s", de.d_iname, orig) != 2) { + reiserfs_warning ("diff: usage: diff filename sourcefilename\n"); + return; + } + + fd = open (orig, O_RDONLY); + if (fd == -1) { + printf ("%s\n", strerror (errno)); + return; + } + + /* open file on reiserfs */ + if (get_inode_by_name (g_pwd, &de)) { + file.f_dentry = &de; + file.f_flags = 0; + file.f_pos = 0; + /* if regular file */ + file.f_op = de.d_inode->i_fop; + } else { + printf ("No such file or directory\n"); + return; + } + while ((rd1 = read (fd, buf1, 1024)) > 0) { + rd2 = file.f_op->read (&file, buf2, 1024, &file.f_pos); + if (rd1 != rd2) { + printf ("Read error 1\n"); + return; + } + if (memcmp (buf1, buf2, rd1)) { + printf ("Read error 2\n"); + return; + } + } +} + + +int do_delete (char * args) +{ + struct dentry de; + + if (sscanf (args, "%255s", de.d_iname) != 1) { + reiserfs_warning ("delete: usage: delete filename\n"); + return 1; + } + if (get_inode_by_name (g_pwd, &de) == 0 || !(S_ISREG (de.d_inode->i_mode) || + S_ISLNK (de.d_inode->i_mode))) { + reiserfs_warning ("delete: file %s does not exist or not a regular or symlink\n", + de.d_name.name); + return 1; + } + reiserfs_unlink (g_pwd, &de); + reiserfs_delete_inode (de.d_inode); + return 0; +} + + +void do_for_each_name (void) +{ + struct dentry de; + struct file file; + char * buf; + struct linux_dirent * p; + + de.d_inode = g_pwd; + file.f_dentry = &de; + file.f_pos = 0; + file.f_op = de.d_inode->i_fop; + + buf = (char *)malloc (1024); + while (emu_getdents (&file, buf, 1024) != 0) { + p = (struct linux_dirent *)buf; + while (p->d_reclen && (char *)p + p->d_reclen < (buf + 1024)) { + printf ("Deleting %s.. %s\n", p->d_name, + do_delete (p->d_name) ? "skipped" : "done"); + + p = (struct linux_dirent *)((char *)p + p->d_reclen); + } + } + + free (buf); +} + + +void do_rm_rf (char * args) +{ + struct dentry de; + + if (sscanf (args, "%255s", de.d_iname) != 1) { + reiserfs_warning ("rm_rf: usage: rm_rf dirname\n"); + return; + } + + if (do_cd (de.d_iname)) + return; + do_for_each_name (); +} + + +static void do_cd_root (void) +{ + struct reiserfs_iget4_args args ; + + args.objectid = REISERFS_ROOT_PARENT_OBJECTID; + if (g_pwd) + iput (g_pwd); + g_pwd = iget4 (&g_sb, REISERFS_ROOT_OBJECTID, 0, (void *)(&args)); +} + + +/* args is name of file which contains list of files to be copied and + directories to be created */ +void do_batch (char * args) +{ + FILE * list; + char * path; + char * name; + + args[strlen (args) - 1] = 0; + list = fopen (args, "r"); + if (list == 0) { + printf ("do_batch: fopen failed on \'%s\': %s\n", args, + strerror (errno)); + return; + } + while (fgets (buf1, sizeof (buf1), list) != 0) { + do_cd_root (); + + /* remove ending \n */ + buf1[strlen (buf1) - 1] = 0; + + /* select last name */ + path = buf1; + name = path + strlen (buf1) - 1; + if (*name == '/') + name --; + while (*name != '/' && name != path) + name --; + if (*name == '/') + *name++ = 0; + if (name == path) + path = 0; + + printf ("cd to %s..", path); + if (path && do_path_cd (path)) { + printf ("do_batch: cd failed\n"); + return; + } + printf ("ok, "); + + if (name [strlen (name) - 1] == '/') { + name [strlen (name) - 1] = 0; + printf ("mkdir %s..", name); + do_mkdir (name); + } else { + printf ("cp %s..", name); + sprintf (buf2, "%s/%s\n", path, name); + do_fcopy (buf2); + } + printf ("done\n"); + } + printf ("Ok\n"); + fclose (list); +} + + + +void do_help (void) +{ + printf (" create filename\n"); + printf (" mkdir dirname\n"); + printf (" rmdir dirname\n"); + printf (" write filename offset count\n"); + printf (" read filename offset count\n"); + printf (" fcopy filename\n"); + printf (" ls\n"); + printf (" cd dirname\n"); + printf (" dcopy dirname\n"); + printf (" diff filename1(created by emu) filename2(original file)\n"); + printf (" delete file\n"); + printf (" truncate filename newsize\n"); + printf (" rm_rf dirname\n"); + printf (" symlink filename pointed-name\n"); + printf (" readlink filename\n"); + printf (" batch filelist\n"); + printf (" quit\n"); +} + + +void release_bitmaps (struct super_block * s) +{ + int i; + + for (i = 0; i < SB_BMAP_NR (s); i ++) { + brelse (SB_AP_BITMAP (s)[i]); + } + + freemem (SB_AP_BITMAP (s)); +} + + +struct dentry d_root; + +int main (int argc, char * argv []) +{ + char cmd[256]; + char * file_name; + int dev; + + printf ("\n<----------- REISERFSv2 EMU ----------->\n"); + + if(argc < 2) + print_usage_and_exit (); + + + file_name = argv[1]; + + /* open_device will die if it could not open device */ + dev = open (file_name, O_RDWR); + if (dev == -1) + reiserfs_panic (0, "emu: can not open '%s': %s", file_name, strerror (errno)); + +/* init_buffer_cache ();*/ + g_sb.s_dev = dev; + uread_super_block (&g_sb); + uread_bitmaps (&g_sb); + + do_cd_root (); + g_sb.s_root = &d_root; + g_sb.s_root->d_inode = g_pwd; + g_sb.u.reiserfs_sb.s_hash_function = keyed_hash;//hash_function (&g_sb); + + + /* check whether device contains mounted tree file system */ + if (is_mounted (file_name)) + reiserfs_warning ("emu: '%s' contains a not mounted file system\n", file_name); + + + + while (1) { + printf ("Enter command: >"); + fgets (cmd, 255, stdin); + + if (strncasecmp (cmd, "crash", 5) == 0) + do_crash (); + if (strncasecmp (cmd, "create ", 7) == 0) + do_create (cmd + 7); + else if (strncasecmp (cmd, "delete ", 7) == 0) + do_delete (cmd + 7); + else if (strncasecmp (cmd, "write ", 6) == 0) + do_write (cmd + 6); + else if (strncasecmp (cmd, "truncate ", 8) == 0) + do_truncate (cmd + 8); + else if (strncasecmp (cmd, "read ", 5) == 0) + do_read (cmd + 5); + else if (strncasecmp (cmd, "mkdir ", 6) == 0) + do_mkdir (cmd + 6); + else if (strncasecmp (cmd, "rmdir ", 6) == 0) + do_rmdir (cmd + 6); + else if (strncasecmp (cmd, "dcopy ", 6) == 0) + do_dcopy (cmd + 6); + else if (strncasecmp (cmd, "fcopy ", 6) == 0) + do_fcopy (cmd + 6); + else if (strncasecmp (cmd, "ls", 2) == 0) + do_readdir (); + else if (strncasecmp (cmd, "cd ", 3) == 0) + do_cd (cmd + 3); + else if (strncasecmp (cmd, "diff ", 5) == 0) + do_diff (cmd + 5); + else if (strncasecmp (cmd, "rm_rf ", 6) == 0) + do_rm_rf (cmd + 6); + else if (strncasecmp (cmd, "symlink ", 8) == 0) + do_symlink (cmd + 8); + else if (strncasecmp (cmd, "readlink ", 9) == 0) + do_readlink (cmd + 9); + else if (strncasecmp (cmd, "batch ", 6) == 0) + do_batch (cmd + 6); + else if (strncmp (cmd, "QUIT", strlen ("QUIT")) == 0) + break; + else if (strncmp (cmd, "q", strlen ("q")) == 0) + break; + else { + do_help (); + } + } + sync_inodes (0); + release_bitmaps (&g_sb); + brelse (g_sb.u.reiserfs_sb.s_sbh); + fsync_dev (0); + return 0; +} + diff -u -r --new-file linux/fs/reiserfs/utils/fsck/Makefile v2.4.0-test8/linux/fs/reiserfs/utils/fsck/Makefile --- linux/fs/reiserfs/utils/fsck/Makefile Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/Makefile Sun May 14 23:15:03 2000 @@ -0,0 +1,43 @@ +VPATH = ../bin +vpath %.c ../..:../bin + +#OBJS = main.o pass1.o pass2.o semantic.o pass4.o ubitmap.o info.o check.o ufile.o ustree.o fix_node.o do_balan.o lbalance.o ibalance.o teahash3.o uobjectid.o segments.o +OBJS = main.o check_tree.o ubitmap.o check.o + +FSCK = $(TMPBINDIR)/reiserfsck + +.c.o: + $(CC) -DREISERFS_FSCK $(CFLAGS) $< + +all: $(FSCK) + + +$(FSCK): $(OBJS) libmisc.a libreiserfs.a + $(CC) $(LFLAGS) -o $(FSCK) $(OBJS) -lmisc -lreiserfs + +clean: + rm -f *.o $(FSCK) *~ + +dep: + gcc -MM $(IDIRS) *.c ../../*.c > .depend + +install: + cp -f $(FSCK) $(SBIN) + if [ -d $(MANDIR) ] ; then cp reiserfsck.8 $(MANDIR) ; gzip -f -9 $(MANDIR)/reiserfsck.8 ; fi + +uninstall: + rm -f $(MANDIR)/reiserfsck.8.gz $(SBIN)/reiserfsck + + +ifeq (.depend,$(wildcard .depend)) +include .depend +endif + + + + + + + + + diff -u -r --new-file linux/fs/reiserfs/utils/fsck/check.c v2.4.0-test8/linux/fs/reiserfs/utils/fsck/check.c --- linux/fs/reiserfs/utils/fsck/check.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/check.c Sun May 14 23:37:11 2000 @@ -0,0 +1,395 @@ +/* + * Copyright 1996, 1997, 1999 Hans Reiser + */ + +#include "fsck.h" + + +#if 0 +/* this goes through buffers checking delimiting keys + */ + +struct buffer_head * g_left = 0; +struct buffer_head * g_right = 0; +struct key * g_dkey = 0; + + +static void check_directory_item (struct item_head * ih, struct buffer_head * bh) +{ + int i; + struct reiserfs_de_head * deh; + + for (i = 0, deh = B_I_DEH (bh, ih); i < I_ENTRY_COUNT (ih) - 1; i ++) + if (deh[i].deh_offset > deh[i + 1].deh_offset) + die ("check_directory_item: entries are not sorted properly"); +} + + +static void check_items (struct buffer_head * bh) +{ + int i; + struct item_head * ih; + + for (i = 0, ih = B_N_PITEM_HEAD (bh, i); i < B_NR_ITEMS (bh); i ++, ih) { + if (is_direntry_le_ih (ih)) + check_directory_item (ih, bh); + } +} + + +static void compare_neighboring_leaves_in_pass1 (void) +{ + struct key * left = B_N_PKEY (g_left, B_NR_ITEMS (g_left) - 1); + + + if (comp_keys (left, B_N_PKEY (g_right, 0)) != SECOND_GREATER) + die ("compare_neighboring_leaves_in_pass1: left key is greater, that the right one"); + + if (/*comp_keys (B_PRIGHT_DELIM_KEY (g_left), g_dkey) == FIRST_GREATER ||*/ + comp_keys (g_dkey, B_N_PKEY (g_right, 0)) != KEYS_IDENTICAL) { + reiserfs_panic (0, "compare_neighboring_leaves_in_pass1: left's rdkey %k, dkey %k, first key in right %k", + B_PRIGHT_DELIM_KEY (g_left), g_dkey, B_N_PKEY (g_right, 0)); + } + + check_items (g_left); + +/*&&&&&&&&&&&&&&&&&&&&&&&&&& + for (i = 0, ih = B_N_PITEM_HEAD (g_left, i); i < B_NR_ITEMS (g_left); i ++, ih ++) + if (is_item_accessed (ih) == YES) + die ("compare_neighboring_leaves_in_pass1: item marked as accessed in g_left"); + for (i = 0, ih = B_N_PITEM_HEAD (g_right, i); i < B_NR_ITEMS (g_right); i ++, ih ++) + if (is_item_accessed (ih) == YES) + die ("compare_neighboring_leaves_in_pass1: item marked as accessed in g_right"); +&&&&&&&&&&&&&&&&&&&&&&&&&&&*/ + +} + + +static void is_there_unaccessed_items (struct buffer_head * bh) +{ + int i; + struct item_head * ih; + + ih = B_N_PITEM_HEAD (bh, 0); + for (i = 0; i < B_NR_ITEMS (bh); i ++, ih ++) { + /* + if (is_objectid_used (ih->ih_key.k_objectid) == NO) + die ("is_there_unaccessed_items: %lu is not marked as used", ih->ih_key.k_objectid); + */ + + if (is_item_accessed (ih) == 0) { + print_block (bh, 1, -1, -1); + die ("is_there_unaccessed_items: unaccessed item found"); + } + } +} + + +static void compare_neighboring_leaves_after_all (void) +{ + struct key * left = B_N_PKEY (g_left, B_NR_ITEMS (g_left) - 1); + struct key * right = B_N_PKEY (g_right, 0); + + if (comp_keys (left, B_PRIGHT_DELIM_KEY (g_left)) != SECOND_GREATER) + die ("compare_neighboring_leaves_after_all: invalid right delimiting key"); + + if (comp_keys (left, B_N_PKEY (g_right, 0)) != SECOND_GREATER) + die ("compare_neighboring_leaves_after_all: left key is greater, that the right one"); + + if (comp_keys (B_PRIGHT_DELIM_KEY (g_left), g_dkey) != KEYS_IDENTICAL || + comp_keys (g_dkey, B_N_PKEY (g_right, 0)) != KEYS_IDENTICAL) { + reiserfs_panic (0, "compare_neighboring_leaves_after all: invalid delimiting keys from left to right (%k %k %k)", + B_PRIGHT_DELIM_KEY (g_left), g_dkey, B_N_PKEY (g_right, 0)); + } + + if (comp_short_keys (left, right) == KEYS_IDENTICAL) { + if (is_direct_le_key (left) || is_indirect_le_key (left)) + if (le_key_k_offset (right) != le_key_k_offset (left) + le_ih_bytes_number (B_N_PITEM_HEAD (g_left, B_NR_ITEMS (g_left) - 1), g_sb.s_blocksize)) + die ("compare_neighboring_leaves_after all: hole between items or items are overlapped"); + } + + is_there_unaccessed_items (g_left); + +} + + +typedef void (check_function_t)(void); + +static void reiserfsck_check_tree (int dev, int block, int size, check_function_t comp_func) +{ + struct buffer_head * bh; + + bh = bread (dev, block, size); + + if (!B_IS_IN_TREE (bh)) { + reiserfs_panic (0, "reiserfsck_check_tree: buffer (%b %z) not in tree", bh, bh); + } + + if (not_formatted_node (bh->b_data, bh->b_size) || !is_block_used (bh->b_blocknr) || + (is_leaf_node (bh->b_data) && is_leaf_bad (bh)) || + (is_internal_node (bh->b_data) && is_internal_bad (bh))) + die ("reiserfsck_check_tree: bad node in the tree"); + if (B_IS_KEYS_LEVEL (bh)) { + int i; + struct disk_child * dc; + + dc = B_N_CHILD (bh, 0); + for (i = 0; i <= B_NR_ITEMS (bh); i ++, dc ++) { + reiserfsck_check_tree (dev, dc->dc_block_number, size, comp_func); + g_dkey = B_N_PDELIM_KEY (bh, i); + } + } else if (B_IS_ITEMS_LEVEL (bh)) { + g_right = bh; + if (g_left != 0 && g_dkey != 0) { + comp_func (); + brelse (g_left); + } + g_left = g_right; + return; + } else { + print_block (bh, 0, -1, -1); + reiserfs_panic (0, "reiserfsck_check_tree: bad block type"); + } + brelse (bh); +} + +static void reiserfsck_check_cached_tree (int dev, int block, int size) +{ + struct buffer_head * bh; + + bh = find_buffer (dev, block, size); + if (bh == 0) + return; + if (!buffer_uptodate (bh)) { + die ("reiserfsck_check_cached_tree: found notuptodate buffer"); + } + bh->b_count ++; + + if (!B_IS_IN_TREE (bh)) { + die ("reiserfsck_check_cached_tree: buffer (%b %z) not in tree", bh, bh); + } + + if (not_formatted_node (bh->b_data, bh->b_size) || !is_block_used (bh->b_blocknr) || + (is_leaf_node (bh->b_data) && is_leaf_bad (bh)) || + (is_internal_node (bh->b_data) && is_internal_bad (bh))) + die ("reiserfsck_check_cached_tree: bad node in the tree"); + if (B_IS_KEYS_LEVEL (bh)) { + int i; + struct disk_child * dc; + + dc = B_N_CHILD (bh, 0); + for (i = 0; i <= B_NR_ITEMS (bh); i ++, dc ++) { + reiserfsck_check_cached_tree (dev, dc->dc_block_number, size); + g_dkey = B_N_PDELIM_KEY (bh, i); + } + } else if (B_IS_ITEMS_LEVEL (bh)) { + /* g_right = bh; + if (g_left != 0 && g_dkey != 0) { + comp_func (); + brelse (g_left); + } + g_left = g_right;*/ + brelse (bh); + return; + } else { + print_block (bh, 0, -1, -1); + reiserfs_panic (0, "reiserfsck_check_cached_tree: bad block type"); + } + brelse (bh); +} + + +void reiserfsck_tree_check (check_function_t how_to_compare_neighbors) +{ + g_left = 0; + g_dkey = 0; + reiserfsck_check_tree (g_sb.s_dev, SB_ROOT_BLOCK (&g_sb), g_sb.s_blocksize, how_to_compare_neighbors); + brelse (g_right); +} + + +void reiserfsck_check_pass1 () +{ + if (opt_check == 1) + reiserfsck_tree_check (compare_neighboring_leaves_in_pass1); +} + +void check_cached_tree () +{ + reiserfsck_check_cached_tree (g_sb.s_dev, SB_ROOT_BLOCK (&g_sb), g_sb.s_blocksize); +} + +void reiserfsck_check_after_all () +{ + reiserfsck_tree_check (compare_neighboring_leaves_after_all); +} + + + + + + +int is_internal_node (char * buf) +{ + struct block_head * blkh; + + blkh = (struct block_head *)buf; + if (blkh->blk_level != DISK_LEAF_NODE_LEVEL) + return 1; + return 0; +} + +int is_leaf_node (char * buf) +{ + struct block_head * blkh; + + blkh = (struct block_head *)buf; + if (blkh->blk_level == DISK_LEAF_NODE_LEVEL) + return 1; + return 0; +} + +static int is_bad_sd (struct item_head * ih, char * item) +{ + struct stat_data * sd = (struct stat_data *)item; + + if (!S_ISDIR (sd->sd_mode) && !S_ISREG(sd->sd_mode) && + !S_ISCHR (sd->sd_mode) && !S_ISBLK(sd->sd_mode) && + !S_ISLNK (sd->sd_mode) && !S_ISFIFO(sd->sd_mode) && + !S_ISSOCK(sd->sd_mode)) { + if (opt_verbose) + reiserfs_warning ("file %k unexpected mode encountered 0%o\n", &ih->ih_key, sd->sd_mode); + } + return 0; +} + + + +static int is_bad_directory (struct item_head * ih, char * item, int blocksize) +{ + int i; + int namelen; + struct reiserfs_de_head * deh = (struct reiserfs_de_head *)item; + __u32 prev_offset = 0; + __u16 prev_location = 0xffff; + + for (i = 0; i < I_ENTRY_COUNT (ih); i ++) { + namelen = I_DEH_N_ENTRY_FILE_NAME_LENGTH (ih, deh + i, i); + if (namelen > REISERFS_MAX_NAME_LEN (blocksize)) { + return 1; + } + if (deh[i].deh_offset <= prev_offset) { + return 1; + } + prev_offset = deh[i].deh_offset; + + if (deh[i].deh_location >= prev_location) { + return 1; + } + } + + return 0; +} + + +#include <sys/ioctl.h> +#include <sys/mount.h> + + +int blocks_on_device (int dev, int blocksize) +{ +int size; + + if (ioctl (dev, BLKGETSIZE, &size) >= 0) { + return size / (blocksize / 512); + } + if (ioctl (dev, BLKGETSIZE, &size) >= 0) { + return size / (blocksize / 512); + } else { + struct stat stat_buf; + memset(&stat_buf, '\0', sizeof(struct stat)); + if(fstat(dev, &stat_buf) >= 0) { + return stat_buf.st_size / (blocksize / 512); + } else { + die ("can not calculate device size\n"); + } + } + return 0; +} + + +/* change incorrect block adresses by 0. Do not consider such item as incorrect */ +static int is_bad_indirect (struct item_head * ih, char * item, int dev, int blocksize) +{ + int i; + int bad = 0; + int blocks; + + if (ih->ih_item_len % UNFM_P_SIZE) { + if (opt_verbose) + reiserfs_warning ("indirect item of %h of invalid length"); + return 1; + } + blocks = blocks_on_device (dev, blocksize); + + for (i = 0; i < I_UNFM_NUM (ih); i ++) { + __u32 * ind = (__u32 *)item; + + if (ind[i] >= blocks) { + bad ++; + ind[i] = 0; + continue; + } + } + return 0; +} + + +int is_bad_item (struct item_head * ih, char * item, int blocksize, int dev) +{ + if (I_IS_STAT_DATA_ITEM (ih)) + return is_bad_sd (ih, item); + + if (I_IS_DIRECTORY_ITEM (ih)) + return is_bad_directory (ih, item, blocksize); + + if (I_IS_INDIRECT_ITEM (ih)) + return is_bad_indirect (ih, item, dev, blocksize); + + return 0; +} + + +/* only directory item can be fatally bad */ +int is_leaf_bad (struct buffer_head * bh) +{ + int i; + struct item_head * ih; + + if (!is_leaf_node (bh->b_data)) + return 0; + for (i = 0, ih = B_N_PITEM_HEAD (bh, 0); i < B_NR_ITEMS (bh); i ++, ih ++) + if (is_bad_item (ih, B_I_PITEM (bh, ih), bh->b_size, bh->b_dev)) + return 1; + return 0; +} + +int is_internal_bad (struct buffer_head * bh) +{ + struct key * key; + int i; + + if (!is_internal_node (bh->b_data)) + return 0; + for (i = 0; i < B_NR_ITEMS (bh); i ++) { + key = B_N_PDELIM_KEY (bh, i); + if (key->k_dir_id >= key->k_objectid || + (key->k_uniqueness != 500 && key->k_uniqueness != (__u32)-1 && key->k_uniqueness != (__u32)-2 && + key->k_uniqueness != 0)) + return 1; + } + return 0; + +} + +#endif diff -u -r --new-file linux/fs/reiserfs/utils/fsck/check_tree.c v2.4.0-test8/linux/fs/reiserfs/utils/fsck/check_tree.c --- linux/fs/reiserfs/utils/fsck/check_tree.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/check_tree.c Sun May 14 23:15:03 2000 @@ -0,0 +1,689 @@ +/* + * Copyright 1999 Hans Reiser + */ + +#include "fsck.h" +#include "reiserfs.h" + + +// +// +// check S+ tree of the file system +// +// check_fs_tree stops and recommends to run fsck --rebuild-tree when: +// 1. read fails +// 2. node of wrong level found in the tree +// 3. something in the tree points to wrong block number +// out of filesystem boundary is pointed by tree +// to block marked as free in bitmap +// the same block is pointed from more than one place +// not data blocks (journal area, super block, bitmaps) +// 4. bad formatted node found +// 5. delimiting keys are incorrect +// + + + +/* to make sure, that no blocks are pointed to from more than one + place we use additional bitmap (control_bitmap). If we see pointer + to a block we set corresponding bit to 1. If it is set already - + run fsck with --rebuild-tree */ +static char ** control_bitmap; +/* will compare with what does super_block say */ +int used_blocks = 0; + + +/* 1 if block is not marked as used in the bitmap */ +static int is_block_free (struct super_block * s, blocknr_t block) +{ + int i, j; + char * bitmap; + + i = block / (s->s_blocksize * 8); + j = block % (s->s_blocksize * 8); + + if (opt_fsck_mode == FSCK_DEFAULT) + bitmap = SB_AP_BITMAP (s)[i]->b_data; + else + bitmap = g_new_bitmap[i]; + return !test_bit (j, bitmap); + +} + + +/* we have seen this block in the tree, mark corresponding bit in the + control bitmap */ +static void we_met_it (struct super_block * s, blocknr_t block) +{ + int i, j; + + used_blocks ++; + i = block / (s->s_blocksize * 8); + j = block % (s->s_blocksize * 8); + return set_bit (j, control_bitmap [i]); +} + + +/* have we seen this block somewhere in the tree before? */ +static int did_we_meet_it (struct super_block * s, blocknr_t block) +{ + int i, j; + + i = block / (s->s_blocksize * 8); + j = block % (s->s_blocksize * 8); + return test_bit (j, control_bitmap [i]); +} + + +static void init_control_bitmap (struct super_block * s) +{ + int i; + + control_bitmap = getmem (sizeof (char *) * SB_BMAP_NR (s)); + for (i = 0; i < SB_BMAP_NR (s); i ++) { + control_bitmap[i] = getmem (s->s_blocksize); + memset (control_bitmap[i], 0, s->s_blocksize); + } + + /* skipped and super block */ + for (i = 0; i <= SB_BUFFER_WITH_SB (s)->b_blocknr; i ++) + we_met_it (s, i); + + /* bitmaps */ + for (i = 0; i < SB_BMAP_NR (s); i ++) + we_met_it (s, SB_AP_BITMAP (s)[i]->b_blocknr); + + for (i = 0; i < get_journal_size (s) + 1; i ++) + we_met_it (s, i + get_journal_start (s)); + + + /* unused space of last bitmap is filled by 1s */ + for (i = SB_BMAP_NR (s) * s->s_blocksize * 8; --i >= SB_BLOCK_COUNT (s); ) { + we_met_it (s, i); + used_blocks --; + } +} + + +static void print_bmap_block (int i, char * data, int silent) +{ + int j, k; + int bits = g_sb.s_blocksize * 8; + int zeros = 0, ones = 0; + + printf ("#%d: ", i); + + if (test_bit (0, data)) { + /* first block addressed by this bitmap block is used */ + ones ++; + if (!silent) + printf ("Busy (%d-", i * bits); + for (j = 1; j < bits; j ++) { + while (test_bit (j, data)) { + ones ++; + if (j == bits - 1) { + if (!silent) + printf ("%d)\n", j + i * bits); + goto end; + } + j++; + } + if (!silent) + printf ("%d) Free(%d-", j - 1 + i * bits, j + i * bits); + + while (!test_bit (j, data)) { + zeros ++; + if (j == bits - 1) { + if (!silent) + printf ("%d)\n", j + i * bits); + goto end; + } + j++; + } + if (!silent) + printf ("%d) Busy(%d-", j - 1 + i * bits, j + i * bits); + + j --; + end: + } + } else { + /* first block addressed by this bitmap is free */ + zeros ++; + if (!silent) + printf ("Free (%d-", i * bits); + for (j = 1; j < bits; j ++) { + k = 0; + while (!test_bit (j, data)) { + k ++; + if (j == bits - 1) { + if (!silent) + printf ("%d)\n", j + i * bits); + zeros += k; + goto end2; + } + j++; + } + zeros += k; + if (!silent) + printf ("%d) Busy(%d-", j - 1 + i * bits, j + i * bits); + + k = 0; + while (test_bit (j, data)) { + ones ++; + if (j == bits - 1) { + if (!silent) + printf ("%d)\n", j + i * bits); + ones += k; + goto end2; + } + j++; + } + ones += k; + if (!silent) + printf ("%d) Busy(%d-", j - 1 + i * bits, j + i * bits); + + j --; + end2: + } + } + + printf ("used %d, free %d\n", ones, zeros); +} + + +static void show_diff (int n, char * disk, char * control, int bits) +{ + int i; + int last_diff = 0; + int from, num; + + for (i = 0; i < bits; i ++) { + if (test_bit (i, disk) && !test_bit (i, control)) { + if (last_diff == 1) { + num ++; + continue; + } else if (last_diff == 2) { + printf ("Block [%d-%d] free in disk bitmap, used in control\n", from, from + num - 1); + } + num = 1; + from = n * bits + i; + last_diff = 1; + continue; + } + if (!test_bit (i, disk) && test_bit (i, control)) { + if (last_diff == 2) { + num ++; + continue; + } else if (last_diff == 1) { + printf ("Block [%d-%d] used in disk bitmap, free in control\n", from, from + num - 1); + } + num = 1; + from = n * bits + i; + last_diff = 2; + continue; + } + /* the same bits */ + if (last_diff == 1) + printf ("Block [%d-%d] used in disk bitmap, free in control\n", from, from + num - 1); + if (last_diff == 2) + printf ("Block [%d-%d] free in disk bitmap, used in control\n", from, from + num - 1); + + num = 0; + from = 0; + last_diff = 0; + continue; + } +} + +static void compare_bitmaps (struct super_block * s) +{ + int i, wrong_bitmap = 0; + char * bitmap; + + printf ("Comparing bitmaps.."); + + if (SB_FREE_BLOCKS (s) != SB_BLOCK_COUNT (s) - used_blocks) { + printf ("\nUsed blocks %d, super block version %d", + used_blocks, SB_BLOCK_COUNT (s) - SB_FREE_BLOCKS (s)); + wrong_bitmap = 1; + } + + for (i = 0; i < SB_BMAP_NR (s); i ++) { + if (opt_fsck_mode == FSCK_DEFAULT) + /* we are read-only checking the partition, check this + bitmap */ + bitmap = SB_AP_BITMAP(s)[i]->b_data; + else + /* we are re-building the tree, bitmap for check is here */ + bitmap = g_new_bitmap [i]; + + if (memcmp (bitmap, control_bitmap[i], s->s_blocksize)) { + printf ("\nbitmap %d does not match to the correct one", i); + if (opt_verbose) { + printf ("\nSee diff"); + show_diff (i, bitmap, control_bitmap[i], s->s_blocksize * 8); + } + wrong_bitmap = 1; + } + } + if (wrong_bitmap) + reiserfs_panic (s, "\nRun reiserfsck with --rebuild-tree (or rewrite correct bitmap)\n"); + + printf ("ok\n"); +} + + + + + + +/* is this block legal to be pointed to by some place of the tree? */ +static int bad_block_number (struct super_block * s, blocknr_t block) +{ + if (block >= SB_BLOCK_COUNT (s)) { + reiserfs_warning ("block out of filesystem boundary found\n"); + return 1; + } + + if (not_data_block (s, block)) { + reiserfs_warning ("not data block is used in the tree\n"); + return 1; + } + + if (is_block_free (s, block)) { + reiserfs_warning ("block %lu is not marked as used in the disk bitmap\n", + block); + return 1; + } + + if (did_we_meet_it (s, block)) { + reiserfs_warning ("block %lu is in tree already\n", block); + return 1; + } + + we_met_it (s, block); + return 0; +} + + +/* 1 if some of fields in the block head of bh look bad */ +static int bad_block_head (struct buffer_head * bh) +{ + struct block_head * blkh; + + blkh = B_BLK_HEAD (bh); + if (__le16_to_cpu (blkh->blk_nr_item) > (bh->b_size - BLKH_SIZE) / IH_SIZE) { + reiserfs_warning ("block %lu has wrong blk_nr_items (%z)\n", + bh->b_blocknr, bh); + return 1; + } + if (__le16_to_cpu (blkh->blk_free_space) > + bh->b_size - BLKH_SIZE - IH_SIZE * __le16_to_cpu (blkh->blk_nr_item)) { + reiserfs_warning ("block %lu has wrong blk_free_space %z\n", + bh->b_blocknr, bh); + return 1; + } + return 0; +} + + +/* 1 if it does not look like reasonable stat data */ +static int bad_stat_data (struct buffer_head * bh, struct item_head * ih) +{ + return 0; +} + + +/* it looks like we can check item length only */ +static int bad_direct_item (struct buffer_head * bh, struct item_head * ih) +{ + return 0; +} + + +/* each unformatted node pointer*/ +static int bad_indirect_item (struct super_block * s, struct buffer_head * bh, + struct item_head * ih) +{ + int i; + __u32 * ind = (__u32 *)B_I_PITEM (bh, ih); + + if (__le16_to_cpu (ih->ih_item_len) % 4) + return 1; + for (i = 0; i < I_UNFM_NUM (ih); i ++) { + /* check unformatted node pointer and mark it used in the + control bitmap */ + if (ind[i] && bad_block_number (s, __le32_to_cpu (ind[i]))) + return 1; + } + /* delete this check for 3.6 */ + if (ih->u.ih_free_space > s->s_blocksize - 1) + reiserfs_warning ("%h has wrong wong ih_free_space\n"); + return 0; +} + + +/* check entry count and locations of all names */ +static int bad_directory_item (struct buffer_head * bh, struct item_head * ih) +{ + int i; + struct reiserfs_de_head * deh; + + + if (I_ENTRY_COUNT (ih) > __le16_to_cpu (ih->ih_item_len) / (DEH_SIZE + 1)) + return 1; + + deh = B_I_DEH (bh, ih); + for (i = 0; i < I_ENTRY_COUNT (ih); i ++, deh ++) { + if (__le16_to_cpu (deh->deh_location) >= __le16_to_cpu (ih->ih_item_len)) + return 1; + if (i && __le16_to_cpu (deh->deh_location) >= __le16_to_cpu ((deh-1)->deh_location)) + return 1; + if ((ih->ih_key.k_objectid != REISERFS_ROOT_OBJECTID && deh_dir_id (deh) == 0) || + deh_offset (deh) == 0 || deh_objectid (deh) == 0 || + deh_dir_id (deh) == deh_objectid (deh)) + return 1; + } + return 0; +} + + +static int bad_item (struct super_block * s, struct buffer_head * bh, int i) +{ + struct item_head * ih; + + ih = B_N_PITEM_HEAD (bh, i); + + if (I_IS_STAT_DATA_ITEM (ih)) + return bad_stat_data (bh, ih); + + if (I_IS_DIRECT_ITEM (ih)) + return bad_direct_item (bh, ih); + + if (I_IS_INDIRECT_ITEM (ih)) + return bad_indirect_item (s, bh, ih); + + return bad_directory_item (bh, ih); +} + + +/* 1 if i-th and (i-1)-th items can not be neighbors */ +static int bad_pair (struct super_block * s, struct buffer_head * bh, int i) +{ + struct item_head * ih; + + ih = B_N_PITEM_HEAD (bh, i); + + if (comp_keys (&((ih - 1)->ih_key), &ih->ih_key) != -1) + return 1; + + if (I_IS_STAT_DATA_ITEM (ih)) + /* left item must be of another object */ + if (comp_short_keys (&((ih - 1)->ih_key), &ih->ih_key) != -1) + return 1; + + if (I_IS_DIRECT_ITEM (ih)) { + /* left item must be indirect or stat data item of the same + file */ + if (comp_short_keys (&((ih - 1)->ih_key), &ih->ih_key) != 0) + return 1; + if (!((I_IS_STAT_DATA_ITEM (ih - 1) && ih->ih_key.k_offset == 1) || + (I_IS_INDIRECT_ITEM (ih - 1) && + (ih - 1)->ih_key.k_offset + I_BYTES_NUMBER (ih - 1, s->s_blocksize) == + ih->ih_key.k_offset))) + return 1; + } + + if (I_IS_INDIRECT_ITEM (ih) || I_IS_DIRECTORY_ITEM (ih)) { + /* left item must be stat data of the same object */ + if (comp_short_keys (&((ih - 1)->ih_key), &ih->ih_key) != 0) + return 1; + if (!I_IS_STAT_DATA_ITEM (ih - 1)) + return 1; + } + + return 0; +} + + +/* 1 if block head or any of items is bad */ +static int bad_leaf (struct super_block * s, struct buffer_head * bh) +{ + int i; + + if (bad_block_head (bh)) + return 1; + + for (i = 0; i < B_NR_ITEMS (bh); i ++) { + if (bad_item (s, bh, i)) { + reiserfs_warning ("block %lu has invalid item %d: %h\n", + bh->b_blocknr, i, B_N_PITEM_HEAD (bh, i)); + return 1; + } + + if (i && bad_pair (s, bh, i)) { + reiserfs_warning ("block %lu has wrong order of items\n", + bh->b_blocknr); + return 1; + } + } + return 0; +} + + +/* 1 if bh does not look like internal node */ +static int bad_internal (struct super_block * s, struct buffer_head * bh) +{ + return 0; +} + + +/* bh must be formatted node. blk_level must be tree_height - h + 1 */ +static int bad_node (struct super_block * s, struct buffer_head * bh, + int level) +{ + if (B_LEVEL (bh) != level) { + reiserfs_warning ("node with wrong level found in the tree\n"); + return 1; + } + + if (bad_block_number (s, bh->b_blocknr)) + return 1; + + if (B_IS_ITEMS_LEVEL (bh)) + return bad_leaf (s, bh); + + return bad_internal (s, bh); +} + + +/* internal node bh must point to block */ +static int get_pos (struct buffer_head * bh, blocknr_t block) +{ + int i; + + for (i = 0; i <= B_NR_ITEMS (bh); i ++) { + if (B_N_CHILD (bh, i)->dc_block_number == block) + return i; + } + die ("get_pos: position for block %lu not found", block); + return 0; +} + + +/* path[h] - leaf node */ +static struct key * lkey (struct buffer_head ** path, int h) +{ + int pos; + + while (h > 0) { + pos = get_pos (path[h - 1], path[h]->b_blocknr); + if (pos) + return B_N_PDELIM_KEY(path[h - 1], pos - 1); + h --; + } + return 0; +} + + +/* path[h] - leaf node */ +static struct key * rkey (struct buffer_head ** path, int h) +{ + int pos; + + while (h > 0) { + pos = get_pos (path[h - 1], path[h]->b_blocknr); + if (pos != B_NR_ITEMS (path[h - 1])) + return B_N_PDELIM_KEY (path[h - 1], pos); + h --; + } + return 0; +} + + +/* are all delimiting keys correct */ +static int bad_path (struct buffer_head ** path) +{ + int h; + struct key * dk; + + h = -1; + while (path[h]) + h ++; + + dk = lkey (path, h); + if (dk && comp_keys (dk, B_N_PKEY (path[h], 0))) + return 1; + dk = rkey (path, h); + if (dk && comp_keys (dk, B_PRIGHT_DELIM_KEY (path[h]))) + return 1; + + return 0; +} + + +static inline blocknr_t first_child (struct buffer_head * bh) +{ + return B_N_CHILD (bh, 0)->dc_block_number; +} + + +static inline blocknr_t last_child (struct buffer_head * bh) +{ + return B_N_CHILD (bh, B_NR_ITEMS (bh))->dc_block_number; +} + + +static inline blocknr_t next_child (struct buffer_head * child, + struct buffer_head * parent) +{ + int i; + + for (i = 0; i < B_NR_ITEMS (parent); i ++) { + if (B_N_CHILD (parent, i)->dc_block_number == child->b_blocknr) + return B_N_CHILD (parent, i + 1)->dc_block_number; + } + die ("next_child: no child found: should not happen"); + return 0; +} + + +/* h == 0 for root level. block head's level == 1 for leaf level */ +static inline int h_to_level (struct super_block * s, int h) +{ + return SB_TREE_HEIGHT (s) - h - 1; +} + + +static inline int leaf_level (struct buffer_head * bh) +{ + return B_LEVEL(bh) == DISK_LEAF_NODE_LEVEL; +} + + +static void print (int cur, int total) +{ + printf ("/%3d (of %3d)", cur, total);fflush (stdout); +} + + +/* erase /XXX(of XXX) */ +static void erase (void) +{ + printf ("\b\b\b\b\b\b\b\b\b\b\b\b\b"); + printf (" "); + printf ("\b\b\b\b\b\b\b\b\b\b\b\b\b"); + fflush (stdout); +} + + +/* pass the S+ tree of filesystem */ +void check_fs_tree (struct super_block * s) +{ + struct buffer_head * path[MAX_HEIGHT] = {0,}; + int total[MAX_HEIGHT] = {0,}; + int cur[MAX_HEIGHT] = {0,}; + int h = 0; + blocknr_t block = SB_ROOT_BLOCK (s); + + uread_bitmaps (s); + + init_control_bitmap (s); + + printf ("Checking S+tree.."); + + while ( 1 ) { + if (path[h]) + die ("check_fs_tree: empty slot expected"); + + if (h) + print (cur[h - 1], total[h - 1]); + + path[h] = bread (s->s_dev, block, s->s_blocksize); + if (path[h] == 0 || bad_node (s, path[h], h_to_level (s, h))) + reiserfs_panic (s, "Run reiserfsck with --rebuild-tree\n"); + + if (leaf_level (path[h])) { + if (bad_path (path)) + reiserfs_panic (s, "Run reiserfsck with --rebuild-tree\n"); + + brelse (path[h]); + if (h) + erase (); + + while (h && path[h]->b_blocknr == last_child (path[h - 1])) { + path[h] = 0; + h --; +/* check_internal (path[h]);*/ + brelse (path[h]); + if (h) + erase (); + } + + if (h == 0) { + path[h] = 0; + break; + } + + cur[h - 1] ++; + block = next_child (path[h], path[h-1]); + path[h] = 0; + continue; + } + total[h] = B_NR_ITEMS (path[h]) + 1; + cur[h] = 1; + block = first_child (path[h]); + h ++; + } + + /* S+ tree is correct (including all objects have correct + sequences of items) */ + printf ("ok\n"); + + /* compare created bitmap with the original */ + compare_bitmaps (s); + +} + + + diff -u -r --new-file linux/fs/reiserfs/utils/fsck/info.c v2.4.0-test8/linux/fs/reiserfs/utils/fsck/info.c --- linux/fs/reiserfs/utils/fsck/info.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/info.c Tue May 11 16:17:29 1999 @@ -0,0 +1,122 @@ + +/* + * Copyright 1996-1999 Hans Reiser + */ +#include "fsck.h" + +struct fsck_stat g_fsck_info = {0, }; + + +void add_event (int event) +{ + switch (event) { + /* tree building (pass 1 and 2) info */ + case GOOD_LEAVES: + g_fsck_info.fs_good_leaves ++; break; + case UNINSERTABLE_LEAVES: + g_fsck_info.fs_uninsertable_leaves ++; break; + case REWRITTEN_FILES: + g_fsck_info.fs_rewritten_files ++; break; + case LEAVES_USED_BY_INDIRECT_ITEMS: + g_fsck_info.fs_leaves_used_by_indirect_items ++; break; + case UNFM_OVERWRITING_UNFM: + g_fsck_info.fs_unfm_overwriting_unfm ++; break; + case INDIRECT_TO_DIRECT: + g_fsck_info.fs_indirect_to_direct ++; break; + + /* pass 3 info (semantic) */ + case FIXED_SIZE_DIRECTORIES: + g_fsck_info.fs_fixed_size_directories ++; break; + case INCORRECT_REGULAR_FILES: + /* file has incorrect sequence of items (incorrect items are truncated) */ + g_fsck_info.fs_incorrect_regular_files ++; break; + case FIXED_SIZE_FILES: + g_fsck_info.fs_fixed_size_files ++; break; + + /* pass 4 info */ + case UNACCESSED_ITEMS: + g_fsck_info.fs_unaccessed_items ++; break; + case FIXED_RIGHT_DELIM_KEY: + g_fsck_info.fs_fixed_right_delim_key ++; break; + + /* file system info */ + case STAT_DATA_ITEMS: + g_fsck_info.fs_stat_data_items ++; break; + case REGULAR_FILES: + g_fsck_info.fs_regular_files ++; break; + case DIRECTORIES: + g_fsck_info.fs_directories ++; break; + case SYMLINKS: + g_fsck_info.fs_symlinks ++; break; + case OTHERS: + g_fsck_info.fs_others ++; break; + } +} + + +int get_event (int event) +{ + switch (event) { + case GOOD_LEAVES: + return g_fsck_info.fs_good_leaves; + case UNINSERTABLE_LEAVES: + return g_fsck_info.fs_uninsertable_leaves; + case REGULAR_FILES: + return g_fsck_info.fs_regular_files; + case INCORRECT_REGULAR_FILES: + return g_fsck_info.fs_incorrect_regular_files; + case DIRECTORIES: + return g_fsck_info.fs_directories; + case FIXED_SIZE_DIRECTORIES: + return g_fsck_info.fs_fixed_size_directories; + case STAT_DATA_ITEMS: + return g_fsck_info.fs_stat_data_items; + } + return 0; +} + +/* outputs information about inconsistencies */ +void output_information () +{ + FILE * fp; + char buf[160]; + + if (opt_verbose == 0) + return; + + fp = stderr; + +/* time (&t); + fputs ("**** This is reiserfsck log file: created ", fp); fputs (ctime (&t), fp); fputs ("\n", fp);*/ + fputs ("Building S+ tree info\n", fp); + sprintf (buf, "\tGood leaves: %d\n", g_fsck_info.fs_good_leaves); fputs (buf, fp); + sprintf (buf, "\tBad leaves: %d\n", g_fsck_info.fs_uninsertable_leaves); fputs (buf, fp); + sprintf (buf, "\tRewritten files: %d\n", g_fsck_info.fs_rewritten_files); fputs (buf, fp); + sprintf (buf, "\tLeaves pointed by indirect item: %d\n", g_fsck_info.fs_leaves_used_by_indirect_items); fputs (buf, fp); + sprintf (buf, "\tUnformatted nodes overwritten by direct items\nand then by other unformatted node: %d\n", + g_fsck_info.fs_unfm_overwriting_unfm); fputs (buf, fp); + sprintf (buf, "\tIndirect_to_direct conversions: %d\n", g_fsck_info.fs_indirect_to_direct); fputs (buf, fp); + + fputs ("Semantic pass info\n", fp); + sprintf (buf, "\tFiles with fixed size: %d\n", g_fsck_info.fs_fixed_size_files); fputs (buf, fp); + sprintf (buf, "\tDirectories with fixed size: %d\n", g_fsck_info.fs_fixed_size_directories); fputs (buf, fp); + sprintf (buf, "\tEntries pointing to nowhere (deleted): %d\n", g_fsck_info.fs_deleted_entries); fputs (buf, fp); + + fputs ("Pass 4 info\n", fp); + sprintf (buf, "\tUnaccessed items found (and deleted): %d\n", g_fsck_info.fs_unaccessed_items); fputs (buf, fp); + sprintf (buf, "\tFixed right delimiting keys: %d\n", g_fsck_info.fs_fixed_right_delim_key); fputs (buf, fp); + sprintf (buf, "\tStat datas: %d\n", g_fsck_info.fs_stat_data_items); fputs (buf, fp); + + + fputs ("File system info\n", fp); + sprintf (buf, "\tFiles found: %d\n", g_fsck_info.fs_regular_files); fputs (buf, fp); + sprintf (buf, "\tDirectories found: %d\n", g_fsck_info.fs_directories); fputs (buf, fp); + sprintf (buf, "\tSymlinks found: %d\n", g_fsck_info.fs_symlinks); fputs (buf, fp); + sprintf (buf, "\tOthers: %d\n", g_fsck_info.fs_others); fputs (buf, fp); + + /*fclose (fp);*/ +} + + + + diff -u -r --new-file linux/fs/reiserfs/utils/fsck/journal.c v2.4.0-test8/linux/fs/reiserfs/utils/fsck/journal.c --- linux/fs/reiserfs/utils/fsck/journal.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/journal.c Sun May 14 23:15:03 2000 @@ -0,0 +1,536 @@ +/* + * Copyright 2000 Hans Reiser + */ + +#include "fsck.h" +#include <limits.h> +#include "reiserfs.h" + + +/* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ +static int journal_compare_desc_commit(struct super_block *p_s_sb, struct reiserfs_journal_desc *desc, + struct reiserfs_journal_commit *commit) { + if (commit->j_trans_id != desc->j_trans_id || commit->j_len != desc->j_len || commit->j_len > JOURNAL_TRANS_MAX || + commit->j_len <= 0 + ) { + return 1 ; + } + return 0 ; +} + + +// +// set up start journal block and journal size +// make journal unreplayable by kernel replay routine +// +void reset_journal (struct super_block * s) +{ + int i ; + struct buffer_head *bh ; + int done = 0; + int len; + int start; + + /* first block of journal */ + s->u.reiserfs_sb.s_rs->s_journal_block = get_journal_start (s); + start = s->u.reiserfs_sb.s_rs->s_journal_block; + + /* journal size */ + s->u.reiserfs_sb.s_rs->s_orig_journal_size = get_journal_size (s); + len = s->u.reiserfs_sb.s_rs->s_orig_journal_size + 1; + + printf ("Resetting journal - "); fflush (stdout); + + for (i = 0 ; i < len ; i++) { + print_how_far (&done, len); + bh = getblk (s->s_dev, start + i, s->s_blocksize) ; + memset(bh->b_data, 0, s->s_blocksize) ; + mark_buffer_dirty(bh,0) ; + mark_buffer_uptodate(bh,0) ; + bwrite (bh); + brelse(bh) ; + } + printf ("\n"); fflush (stdout); + +#if 0 /* need better way to make journal unreplayable */ + + + /* have journal_read to replay nothing: look for first non-desc + block and set j_first_unflushed_offset to it */ + { + int offset; + struct buffer_head * bh, *jh_bh; + struct reiserfs_journal_header * j_head; + struct reiserfs_journal_desc * desc; + + + jh_bh = bread (s->s_dev, s->u.reiserfs_sb.s_rs->s_journal_block + s->u.reiserfs_sb.s_rs->s_orig_journal_size, + s->s_blocksize); + j_head = (struct reiserfs_journal_header *)(jh_bh->b_data); + + for (offset = 0; offset < s->u.reiserfs_sb.s_rs->s_orig_journal_size; offset ++) { + bh = bread (s->s_dev, s->u.reiserfs_sb.s_rs->s_journal_block + offset, s->s_blocksize); + desc = (struct reiserfs_journal_desc *)((bh)->b_data); + if (memcmp(desc->j_magic, JOURNAL_DESC_MAGIC, 8)) { + /* not desc block found */ + j_head->j_first_unflushed_offset = offset; + brelse (bh); + break; + } + brelse (bh); + } + + mark_buffer_uptodate (jh_bh, 1); + mark_buffer_dirty (jh_bh, 1); + bwrite (jh_bh); + brelse (jh_bh); + } +#endif +} + +// +// end of stolen from ./fs/reiserfs/journal.c +// + + +#define bh_desc(bh) ((struct reiserfs_journal_desc *)((bh)->b_data)) +#define bh_commit(bh) ((struct reiserfs_journal_commit *)((bh)->b_data)) + + + + + +static int desc_block (struct buffer_head * bh) +{ + struct reiserfs_journal_desc * desc = (struct reiserfs_journal_desc *)bh->b_data; + if (!memcmp(desc->j_magic, JOURNAL_DESC_MAGIC, 8)) + return 1; + return 0; +} + +static int next_expected_desc (struct super_block * s, struct buffer_head * d_bh) +{ + int offset; + struct reiserfs_journal_desc * desc; + + desc = (struct reiserfs_journal_desc *)d_bh->b_data; + offset = d_bh->b_blocknr - get_journal_start (s); + return get_journal_start (s) + ((offset + desc->j_len + 1 + 1) % JOURNAL_BLOCK_COUNT); +} + + +static int is_valid_transaction (struct super_block * s, struct buffer_head * d_bh) +{ + struct buffer_head * c_bh; + int offset; + struct reiserfs_journal_desc *desc = (struct reiserfs_journal_desc *)d_bh->b_data; + struct reiserfs_journal_commit *commit ; + + + offset = d_bh->b_blocknr - get_journal_start (s); + + /* ok, we have a journal description block, lets see if the transaction was valid */ + c_bh = bread (s->s_dev, next_expected_desc (s, d_bh) - 1, + s->s_blocksize) ; + + commit = (struct reiserfs_journal_commit *)c_bh->b_data ; + if (journal_compare_desc_commit (s, desc, commit)) { +/* printf ("desc and commit block do not match\n");*/ + brelse (c_bh) ; + return 0; + } + brelse (c_bh); + return 1; +} + + +int next_desc (struct super_block * s, int this) +{ + int j; + struct buffer_head * bh; + int retval; + + j = this + 1; + do { + bh = bread (s->s_dev, (j % JOURNAL_BLOCK_COUNT), s->s_blocksize); + if (!desc_block (bh)) { + j ++; + brelse (bh); + continue; + } +/* printf ("desc block found %lu, trans_id %ld, len %ld\n", + bh->b_blocknr, bh_desc(bh)->j_trans_id, bh_desc(bh)->j_len);*/ + retval = (j % JOURNAL_BLOCK_COUNT); + brelse (bh); + break; + } while (1); + + return retval; +} + + +void replay_all (struct super_block * s) +{ + int first_journal_block = get_journal_start (s); + int journal_size = get_journal_size (s); + struct buffer_head * d_bh, * c_bh; + struct reiserfs_journal_desc *desc ; + struct reiserfs_journal_commit *commit ; + int i; + int the_most_old_transaction = INT_MAX; + int the_most_young_transaction = 0; + int valid_transactions = 0; + int last_replayed; + int start_replay = 0; + + + /* look for oldest valid transaction */ + printf ("Looking for the oldest transaction to start with %4d", valid_transactions); + for (i = first_journal_block; i < first_journal_block + journal_size; i ++) { + d_bh = bread (s->s_dev, i, s->s_blocksize); + if (desc_block (d_bh)) { + desc = (struct reiserfs_journal_desc *)d_bh->b_data; + /*printf ("block %ld is desc block of the transaction (trans_id %ld, len %ld, mount_id %ld) - ", + d_bh->b_blocknr, desc->j_trans_id, desc->j_len, desc->j_mount_id);*/ + if (!is_valid_transaction (s, d_bh)) { + i += desc->j_len + 1; + brelse (d_bh); + continue; + } + valid_transactions ++; + printf ("\b\b\b\b \b\b\b\b%4d", valid_transactions); fflush (stdout); + + /*printf ("good\n");*/ + if (the_most_old_transaction > desc->j_trans_id) { + the_most_old_transaction = desc->j_trans_id; + start_replay = d_bh->b_blocknr; + } + if (the_most_young_transaction < desc->j_trans_id) { + the_most_young_transaction = desc->j_trans_id; + start_replay = d_bh->b_blocknr; + } + i += desc->j_len + 1; + } + brelse (d_bh); + continue; + } + + printf ("\b\b\b\b \b\b\b\bok\n" + "%d valid trans found. Will replay from %d to %d\n", valid_transactions, + the_most_old_transaction, the_most_young_transaction); + + + printf ("Replaying transaction..%4d left..\b\b\b\b\b\b\b", valid_transactions); + + /* replay all valid transaction */ + last_replayed = 0; + + while (1) { + d_bh = bread (s->s_dev, start_replay, s->s_blocksize); + if (!desc_block (d_bh)) { +/* printf ("No desc block found at the expected place %lu\n", d_bh->b_blocknr);*/ + brelse (d_bh); + start_replay = next_desc (s, start_replay); + continue; + } + + desc = bh_desc (d_bh); + + if (!is_valid_transaction (s, d_bh)) { +/* printf ("skip invalid transaction %ld (length %ld) starting from %lu\n", desc->j_trans_id, desc->j_len, d_bh->b_blocknr);*/ + brelse (d_bh); + start_replay = next_desc (s, start_replay); + continue; + } + + if (desc->j_trans_id < last_replayed) { + /* we found transaction that has been replayed already */ + brelse (d_bh); +/* printf ("Found transaction %ld. last replayed %d\n", desc->j_trans_id, last_replayed);*/ + break; + } +/* printf ("Replay transaction %ld (length %ld)-", desc->j_trans_id, desc->j_len);*/ + + + /* replay transaction */ + { + int trans_offset = d_bh->b_blocknr - get_journal_start (s); + struct buffer_head * log_bh, * in_place; + + + c_bh = bread (s->s_dev, get_journal_start (s) + ((trans_offset + desc->j_len + 1) % JOURNAL_BLOCK_COUNT), + s->s_blocksize) ; + + desc = bh_desc (d_bh); + commit = bh_commit (c_bh); + if (journal_compare_desc_commit(s, desc, commit)) + die ("read_journal: invalid transaction"); + + for (i = 0; i < desc->j_len; i ++) { + /* read from log record */ + log_bh = bread (s->s_dev, get_journal_start (s) + (trans_offset + 1 + i) % JOURNAL_BLOCK_COUNT, + s->s_blocksize); + if (log_bh->b_blocknr == 8199) + printf ("block 8199 put in-placen\n"); + /* write in-place */ + if (i < JOURNAL_TRANS_HALF) { + in_place = getblk(s->s_dev, desc->j_realblock[i], s->s_blocksize) ; + } else { + in_place = getblk(s->s_dev, commit->j_realblock[i - JOURNAL_TRANS_HALF], s->s_blocksize) ; + } + if (log_bh->b_blocknr == 8199) { + printf ("Put 8199 to %lu\n", in_place->b_blocknr); + } + memcpy (in_place->b_data, log_bh->b_data, s->s_blocksize); + mark_buffer_dirty (in_place, 0); + mark_buffer_uptodate (in_place, 1); + bwrite (in_place); + brelse (in_place); + brelse (log_bh); + } + brelse (c_bh); + } + valid_transactions --; + printf ("\b\b\b\b \b\b\b\b%4d", valid_transactions); fflush (stdout); + last_replayed = desc->j_trans_id; + start_replay = next_expected_desc (s, d_bh); + brelse (d_bh); + } + printf (" left .. ok\n"); +} + + +// +// these duplicate the same from fsck/check_tree.c +// +static inline blocknr_t first_child (struct buffer_head * bh) +{ + return B_N_CHILD (bh, 0)->dc_block_number; +} + + +static inline blocknr_t last_child (struct buffer_head * bh) +{ + return B_N_CHILD (bh, B_NR_ITEMS (bh))->dc_block_number; +} + + +static inline blocknr_t next_child (struct buffer_head * child, + struct buffer_head * parent) +{ + int i; + + for (i = 0; i < B_NR_ITEMS (parent); i ++) { + if (B_N_CHILD (parent, i)->dc_block_number == child->b_blocknr) + return B_N_CHILD (parent, i + 1)->dc_block_number; + } + die ("next_child: no child found: should not happen"); + return 0; +} + + +/* h == 0 for root level. block head's level == 1 for leaf level */ +static inline int h_to_level (struct super_block * s, int h) +{ + return SB_TREE_HEIGHT (s) - h - 1; +} + + +static inline int leaf_level (struct buffer_head * bh) +{ + return B_LEVEL(bh) == DISK_LEAF_NODE_LEVEL; +} + + +static void print (int cur, int total) +{ + printf ("/%3d (of %3d)", cur, total);fflush (stdout); +} + + +/* erase /XXX(of XXX) */ +static void erase (void) +{ + printf ("\b\b\b\b\b\b\b\b\b\b\b\b\b"); + printf (" "); + printf ("\b\b\b\b\b\b\b\b\b\b\b\b\b"); + fflush (stdout); +} + + +/* the simplest scanning for free block., This should be rare */ +__u32 alloc_block (void) +{ + int i, j; + int bits = g_sb.s_blocksize * 8; + int start = get_journal_start (&g_sb) + get_journal_size (&g_sb) + 1; + + for (i = 0; i < SB_BMAP_NR (&g_sb); i ++) { + j = find_next_zero_bit (g_new_bitmap[i], bits, start); + if (j < bits) { + mark_block_used (j + i * bits); + return j + i * bits; + } + start = 0; + } + die ("allocate_block: no free blocks"); + return 0; + +} + +struct buffer_head * copy_contents (struct buffer_head * from) +{ + struct buffer_head * bh; + __u32 new; + + new = alloc_block (); + bh = getblk (from->b_dev, new, from->b_size); + memcpy (bh->b_data, from->b_data, bh->b_size); + mark_buffer_uptodate (bh, 1); + mark_buffer_dirty (bh, 1); + bwrite (bh); + return bh; +} + + +static void update_pointer (struct buffer_head * parent, __u32 new, __u32 old) +{ + int i; + + for (i = 0; i <= B_NR_ITEMS (parent); i ++) { + if (B_N_CHILD (parent, i)->dc_block_number == old) { + B_N_CHILD (parent, i)->dc_block_number = new; + mark_buffer_dirty (parent, 1); + return; + } + } + die ("update_pointer: old pointer not found"); +} + + +static int block_from_journal (struct super_block * s, __u32 block) +{ + if(block && block < get_journal_start (s)) { + printf ("not data block (%d) got into tree. Should not appear, but fixable\n", block); + return 0; + } + if (block >= get_journal_start (s) && block <= get_journal_start (s) + get_journal_size (s)) + /* <= must_journal_end due to journal header */ + return 1; + return 0; +} + + +/* sometimes indirect items point to blocks from journal. Replace them + with data blocks. I believe this is rare case */ +static void correct_indirect_items (struct super_block * s, struct buffer_head * bh) +{ + int i, j; + struct item_head * ih; + __u32 * unfm; + + ih = B_N_PITEM_HEAD (bh, 0); + for (i = 0; i < B_NR_ITEMS (bh); i ++, ih ++) { + if (!I_IS_INDIRECT_ITEM (ih)) + continue; + unfm = (__u32 *)B_I_PITEM (bh, ih); + for (j = 0; j < I_UNFM_NUM (ih); j++) { + if (block_from_journal (s, unfm[j])) { + struct buffer_head * from, * to; + + from = bread (bh->b_dev, unfm[j], bh->b_size); + to = copy_contents (from); + unfm[j] = to->b_blocknr; + mark_buffer_dirty (bh, 1); + brelse (from); + brelse (to); + } + } + } +} + + + +/* sometimes, (hopefully very rare) we have to use journal blocks to + complete tree building. In this case we have to find all those + blocks and replace them with data blocks (Those must exist to this + time. We have to look such blocks also when start of */ +void release_journal_blocks (struct super_block * s) +{ + struct buffer_head * path[MAX_HEIGHT] = {0,}; + int total[MAX_HEIGHT] = {0,}; + int cur[MAX_HEIGHT] = {0,}; + int h = 0; + + + blocknr_t block = SB_ROOT_BLOCK (s); + + printf ("%d blocks from journal area [%d %d] has been used to perform repairing. Will release them. This may take a while\nScanning tree..", + from_journal, get_journal_start (s), + get_journal_start (s) + get_journal_size (s)); + + + while ( 1 ) { + if (path[h]) + die ("release_journal_blocks: empty slot expected"); + + if (h) + print (cur[h - 1], total[h - 1]); + + path[h] = bread (s->s_dev, block, s->s_blocksize); + if (path[h] == 0) + die ("release_journal_blocks: bread failed"); + + if (block_from_journal (s, path[h]->b_blocknr)) { + /* copy block to newly allocated, adjust pointer in the + parent, replace on the path */ + struct buffer_head * bh; + __u32 old = path[h]->b_blocknr; + + bh = copy_contents (path[h]); + brelse (path[h]); + path[h] = bh; + if (h) { + /* adjust corresponding dc_child_num in the parent*/ + update_pointer (path[h - 1], bh->b_blocknr, old); + } else { + /* change pointer from super block */ + SB_ROOT_BLOCK (s) = bh->b_blocknr; + } + } + + if (leaf_level (path[h])) { + /* correct unformatted node pointers if they point to the + journal area */ + correct_indirect_items (s, path[h]); + + brelse (path[h]); + if (h) + erase (); + + while (h && path[h]->b_blocknr == last_child (path[h - 1])) { + path[h] = 0; + h --; + brelse (path[h]); + if (h) + erase (); + } + + if (h == 0) { + path[h] = 0; + break; + } + + cur[h - 1] ++; + block = next_child (path[h], path[h-1]); + path[h] = 0; + continue; + } + total[h] = B_NR_ITEMS (path[h]) + 1; + cur[h] = 1; + block = first_child (path[h]); + h ++; + } + + printf ("ok\n"); +} diff -u -r --new-file linux/fs/reiserfs/utils/fsck/main.c v2.4.0-test8/linux/fs/reiserfs/utils/fsck/main.c --- linux/fs/reiserfs/utils/fsck/main.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/main.c Sun May 14 23:37:11 2000 @@ -0,0 +1,650 @@ +/* + * Copyright 1996-2000 Hans Reiser + */ +#define __USE_GNU +#include <stdio.h> + +#include "fsck.h" +#include "reiserfs.h" + +#include <getopt.h> +#include <sys/mount.h> + + +// Being called without optional parameters fsck will read-only check +// thefilesystem. Only this works so far. + + + +#define print_usage_and_exit() die ("Usage: %s [-aprvy] [--rebuild-tree]\n"\ +"[--scan-whole-partition] [--no-journal-replay]\n"\ +"[--replay-whole-journal] device\n"\ +"\n"\ +"Long options:\n"\ +"\tModes:\n"\ +"\t--check consistency checking (default)\n"\ +"\t--rebuild-tree force fsck to rebuild filesystem from scratch\n"\ +"\t (takes a long time)\n"\ +"\t--search-key will search for key you specify\n"\ +"\n"\ +"\tArea to scan:\n"\ +"\t--scan-used-part-only scan what is marked used in bitmap (default)\n"\ +"\t--scan-whole-partition scan whole partition\n"\ +"\n"\ +"\tJournal replay options:\n"\ +"\t--replay-by-mount replay by calling mount -o replay-only (default)\n"\ +"\t--no-journal-replay skip journal replaying\n"\ +"\t--replay-whole-journal replay all valid transaction found in the journal\n"\ +"\n"\ +"\tStop point specifying\n"\ +"\t--do-not-stop (default)\n"\ +"\t--stop-after-replay\n"\ +"\t--stop-after-pass1\n"\ +"\t--stop-after-pass2\n"\ +"\t--stop-after-semantic-pass\n"\ +"Short options:\n"\ +"\t-v verbose mode\n"\ +"\t-a supress progress information\n"\ +"\t-y\n"\ +"\t-p do nothing, exist for compatibility with fsck(8)\n"\ +"\t-r\n", argv[0]); + + +int opt_verbose = 0; +int opt_fsck = 0; /* called with -a by fsck - the front-end for the + various file system checkers */ + + +// +// fsck has three modes: default one - is check, other two are rebuild +// and find items +// +int opt_fsck_mode = FSCK_DEFAULT; + +/* in mode FSCK_FIND_ITEM keu for search is stored here */ +struct key key_to_find; + +// +// replay journal modes +// +#define REPLAY_DEFAULT 0 +#define REPLAY_ALL 1 +#define NO_REPLAY 2 +int opt_journal_replay = REPLAY_DEFAULT; + + +// +// fsck may stop after any of its phases: after journal replay or +// after any of passes. Default is do not stop +// +int opt_stop_point = STOP_DEFAULT; + + +// +// +// +int opt_what_to_scan = SCAN_USED_PART; + + +// +// +// +int opt_lost_found = NO_LOST_FOUND; + + + +/* fsck is called with one non-optional argument - file name of device + containing reiserfs. This function parses other options, sets flags + based on parsing and returns non-optional argument */ +static char * parse_options (int argc, char * argv []) +{ + int c; + + while (1) { + static struct option options[] = { + // mode options + {"check", no_argument, &opt_fsck_mode, FSCK_DEFAULT}, + {"rebuild-tree", no_argument, &opt_fsck_mode, FSCK_REBUILD}, + {"search-key", no_argument, &opt_fsck_mode, FSCK_FIND_ITEM}, + + // journal replay options + {"replay-by-mount", no_argument, &opt_journal_replay, REPLAY_DEFAULT}, + {"no-journal-replay", no_argument, &opt_journal_replay, NO_REPLAY}, + {"replay-whole-journal", no_argument, &opt_journal_replay, REPLAY_ALL}, + + // stop point options + {"do-not-stop", no_argument, &opt_stop_point, STOP_DEFAULT}, + {"stop-after-replay", no_argument, &opt_stop_point, STOP_AFTER_REPLAY}, + {"stop-after-pass1", no_argument, &opt_stop_point, STOP_AFTER_PASS1}, + {"stop-after-pass2", no_argument, &opt_stop_point, STOP_AFTER_PASS2}, + {"stop-after-semantic-pass", no_argument, &opt_stop_point, STOP_AFTER_SEMANTIC}, + + // scanned area option + {"scan-used-part-only", no_argument, &opt_what_to_scan, SCAN_USED_PART}, + {"scan-whole-partition", no_argument, &opt_what_to_scan, SCAN_WHOLE_PARTITION}, + + // lost+found + {"no-lost+found", no_argument, &opt_lost_found, NO_LOST_FOUND}, + {"lost+found", no_argument, &opt_lost_found, DO_LOST_FOUND}, + {0, 0, 0, 0} + }; + int option_index; + + c = getopt_long (argc, argv, "yapv", options, &option_index); + if (c == -1) + break; + + switch (c) { + case 0: + switch (option_index) { + case 0: /* check */ + case 1: /* rebuild */ + case 2: /* find */ + break; + + case 3: /* replay by mount */ + case 4: /* no journal replay */ + case 5: /* replay whole journal */ + break; + + case 6: /* do not stop */ + case 7: /* stop after replay */ + case 8: /* stop after pass 1 */ + case 9: /* stop after pass 2 */ + case 10: /* stop after semantic */ + break; + case 11: /* scan used part of partition */ + case 12: /* scan whole partition */ + break; + + } + break; + + case 'y': + case 'p': /* these do nothing */ + case 'r': + break; + + case 'a': + opt_fsck = 1; + break; + + case 'v': + /* output fsck statistics to stdout on exit */ + opt_verbose = 1; + break; + + default: + print_usage_and_exit(); + } + } + + if (optind != argc - 1) + /* only one non-option argument is permitted */ + print_usage_and_exit(); + + return argv[optind]; +} + + +struct super_block g_sb; +struct buffer_head * g_sbh; +struct reiserfs_super_block * g_old_rs; + + + +static void reset_super_block (struct super_block * s) +{ + unsigned long * oids; + + g_old_rs = (struct reiserfs_super_block *)getmem (s->s_blocksize); + memcpy (g_old_rs, SB_BUFFER_WITH_SB (s)->b_data, s->s_blocksize); + + /* reset few fields in */ + SB_FREE_BLOCKS (s) = SB_BLOCK_COUNT (s); + SB_TREE_HEIGHT (s) = ~0; + SB_ROOT_BLOCK (s) = ~0; + s->u.reiserfs_sb.s_mount_state = REISERFS_ERROR_FS; + s->u.reiserfs_sb.s_rs->s_oid_cursize = 2; + oids = (unsigned long *)(s->u.reiserfs_sb.s_rs + 1); + if (oids[0] != 1) { + printf ("reset_super_block: invalid objectid map\n"); + oids[0] = 1; + } + oids[1] = 2; + s->s_dirt = 1; + + mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 0); +} + +static void update_super_block (void) +{ + SB_REISERFS_STATE (&g_sb) = REISERFS_VALID_FS; + + reset_journal (&g_sb); + + mark_buffer_dirty (SB_BUFFER_WITH_SB (&g_sb), 0); +} + + +char ** g_disk_bitmap; +char ** g_new_bitmap; +char ** g_uninsertable_leaf_bitmap; +char ** g_formatted; +char ** g_unformatted; +int g_blocks_to_read; + + +/* read bitmaps (new or old format), create data blocks for new + bitmap, mark non-data blocks in it (skipped, super block, journal + area, bitmaps) used, create other auxiliary bitmaps */ +static void init_bitmaps (struct super_block * s) +{ + int i, j; + + /* read disk bitmap */ + if (uread_bitmaps (s)) + die ("init_bitmap: unable to read bitmap"); + + g_disk_bitmap = getmem (sizeof (char *) * SB_BMAP_NR (s)); + for (i = 0; i < SB_BMAP_NR (s); i ++) { + g_disk_bitmap[i] = SB_AP_BITMAP (s)[i]->b_data; + + if (opt_what_to_scan == SCAN_WHOLE_PARTITION) + /* mark all blocks busy */ + memset (g_disk_bitmap[i], 0xff, s->s_blocksize); + } + + + /* g_blocks_to_read is used to report progress */ + if (opt_what_to_scan == SCAN_WHOLE_PARTITION) + /* all blocks will be scanned */ + g_blocks_to_read = SB_BLOCK_COUNT (s); + else { + /* blocks marked used in bitmap will be scanned */ + g_blocks_to_read = 0; + for (i = 0; i < SB_BMAP_NR (s); i ++) { + for (j = 0; j < s->s_blocksize * 8; j ++) + if (i * s->s_blocksize * 8 + j < SB_BLOCK_COUNT (s) && + test_bit (j, SB_AP_BITMAP (s)[i]->b_data)) + g_blocks_to_read ++; + } + } + + /* this bitmap will contain valid bitmap when fsck will have done */ + g_new_bitmap = getmem (sizeof (char *) * SB_BMAP_NR (s)); + for (i = 0; i < SB_BMAP_NR (s); i ++) + g_new_bitmap[i] = getmem (s->s_blocksize); + + /* mark skipped blocks and super block used */ + for (i = 0; i <= SB_BUFFER_WITH_SB (s)->b_blocknr; i ++) + mark_block_used (i); + + /* mark bitmap blocks as used */ + for (i = 0; i < SB_BMAP_NR (s); i ++) + mark_block_used (SB_AP_BITMAP (s)[i]->b_blocknr); + + /* mark journal area as used */ + for (i = 0; i < JOURNAL_BLOCK_COUNT + 1; i ++) + mark_block_used (i + get_journal_start (s)); + + /* fill by 1s the unused part of last bitmap */ + if (SB_BLOCK_COUNT (s) % (s->s_blocksize * 8)) + for (j = SB_BLOCK_COUNT (s) % (s->s_blocksize * 8); j < s->s_blocksize * 8; j ++) + set_bit (j, g_new_bitmap[SB_BMAP_NR (s) - 1]); + + /* allocate space for bitmap of uninsertable leaves */ + g_uninsertable_leaf_bitmap = getmem (sizeof (char *) * SB_BMAP_NR (s)); + for (i = 0; i < SB_BMAP_NR (s); i ++) { + g_uninsertable_leaf_bitmap[i] = getmem (s->s_blocksize); + memset (g_uninsertable_leaf_bitmap[i], 0xff, s->s_blocksize); + } + + /* bitmap of formatted nodes */ + g_formatted = getmem (sizeof (char *) * SB_BMAP_NR (s)); + for (i = 0; i < SB_BMAP_NR (s); i ++) { + g_formatted[i] = getmem (s->s_blocksize); + memset (g_formatted[i], 0, s->s_blocksize); + } + /* bitmap of unformatted nodes */ + g_unformatted = getmem (sizeof (char *) * SB_BMAP_NR (s)); + for (i = 0; i < SB_BMAP_NR (s); i ++) { + g_unformatted[i] = getmem (s->s_blocksize); + memset (g_unformatted[i], 0, s->s_blocksize); + } +} + + +/* write bitmaps and brelse them */ +static void update_bitmap (struct super_block * s) +{ + int i; + + /* journal area could be used, reset it */ + for (i = 0; i < get_journal_start (s) + get_journal_size (s) + 1; i ++) + if (!is_block_used (i)) + mark_block_used (i); + + for (i = 0; i < SB_BMAP_NR (s); i ++) { + + /* copy newly built bitmap to cautious bitmap */ + memcpy (SB_AP_BITMAP (s)[i]->b_data, g_new_bitmap[i], s->s_blocksize); + mark_buffer_dirty (SB_AP_BITMAP (s)[i], 0); + bwrite (SB_AP_BITMAP (s)[i]); + + + freemem (g_new_bitmap[i]); + /* g_disk_bitmap[i] points to corresponding cautious bitmap's b_data */ + freemem (g_uninsertable_leaf_bitmap[i]); + } + + freemem (g_disk_bitmap); + freemem (g_new_bitmap); + freemem (g_uninsertable_leaf_bitmap); + +} + + +static void release_bitmap (void) +{ + int i; + + for (i = 0; i < SB_BMAP_NR (&g_sb); i ++) { + brelse (SB_AP_BITMAP (&g_sb)[i]); + } +} + +static void release_super_block (void) +{ + bwrite (SB_BUFFER_WITH_SB (&g_sb)); + freemem (SB_AP_BITMAP (&g_sb)); + brelse (SB_BUFFER_WITH_SB (&g_sb)); + + freemem (g_old_rs); +} + + + +static void mount_replay (char * devname1) +{ + int retval; + char * tmpdir; + + printf ("Replaying journal.."); fflush (stdout); + + tmpdir = tmpnam (0); + if (!tmpdir || mkdir (tmpdir, 0644)) + die ("replay_journal: tmpnam or mkdir failed: %s", strerror (errno)); + + retval = mount (devname1, tmpdir, "reiserfs", MS_MGC_VAL, "replayonly"); + if (retval != -1 || errno != EINVAL) { + printf ("\nMAKE SURE, THAT YOUR KERNEL IS ABLE TO MOUNT REISERFS\n"); + die ("replay_journal: mount returned unexpected value: %s", + strerror (errno)); + } + + if (rmdir (tmpdir) == -1) + die ("replay_journal: rmdir failed: %s", strerror (errno)); + + printf ("ok\n"); fflush (stdout); +} + + +static inline int nothing_todo (struct super_block * s) +{ + if (opt_fsck) + return 1; + return 0; +} + + +void write_dirty_blocks (void) +{ + fsync_dev (0); +} + + +#define WARNING \ +"Don't run this program unless something is broken. You may want\n\ +to backup first. Some types of random FS damage can be recovered\n\ +from by this program, which basically throws away the internal nodes\n\ +of the tree and then reconstructs them. This program is for use only\n\ +by the desperate, and is of only beta quality. Email\n\ +reiserfs@devlinux.com with bug reports. \n" + +/* + warning #2 + you seem to be running this automatically. you are almost + certainly doing it by mistake as a result of some script that + doesn't know what it does. doing nothing, rerun without -p if you + really intend to do this. */ + +void warn_what_will_be_done (void) +{ + char * answer = 0; + size_t n = 0; + + /* warn about fsck mode */ + switch (opt_fsck_mode) { + case FSCK_DEFAULT: + printf ("Will read-only check consistency of the partition\n"); + break; + + case FSCK_REBUILD: + printf (WARNING); + break; + + case FSCK_FIND_ITEM: + printf ("Will look for the item with key\n"); + break; + } + + /* warn about replay */ + switch (opt_journal_replay) { + case REPLAY_DEFAULT: + printf ("Will replay just like mounting would\n"); + break; + + case REPLAY_ALL: + printf ("Will replay all valid transactions\n"); break; + + case NO_REPLAY: + printf ("Will not replay journal\n"); break; + } + + /* warn about stop point */ + switch (opt_stop_point) { + case STOP_AFTER_REPLAY: + printf ("Will stop after journal replay\n"); break; + case STOP_AFTER_PASS1: + printf ("Will stop after pass 1\n"); break; + + case STOP_AFTER_PASS2: + printf ("Will stop after pass 2\n"); break; + + case STOP_AFTER_SEMANTIC: + printf ("Will stop after semantic pass\n"); break; + } + + + /* warn about scanned area */ + if (opt_what_to_scan == SCAN_WHOLE_PARTITION) + printf ("Will scan whole partition\n"); + + printf ("Do you want to run this " + "program?[N/Yes] (note need to type Yes):"); + if (getline (&answer, &n, stdin) != 4 || strcmp ("Yes\n", answer)) { + exit (0); + } + + if (opt_fsck_mode == FSCK_FIND_ITEM) { + printf ("Specify key to search:"); + if (scanf ("%d %d %d %d", &(key_to_find.k_dir_id), &(key_to_find.k_objectid), + &(key_to_find.k_offset), &(key_to_find.k_uniqueness)) != 4) + die ("parse_options: specify a key through stdin"); + } +} + + +void end_fsck (char * file_name) +{ + update_super_block (); + update_bitmap (&g_sb); + release_bitmap (); + release_super_block (); + + if (opt_verbose == 1) + output_information (); + + printf ("Syncing.."); fflush (stdout); + + write_dirty_blocks (); + sync (); + + printf ("done\n"); fflush (stdout); + + if (opt_verbose == 1) + printf ("Checking mem.."); + + free_overwritten_unfms (); + check_and_free_buffer_mem (); + + if (opt_verbose == 1) + printf ("done\n"); + + if (opt_fsck == 1) + printf("ReiserFS : done checking %s\n", file_name); + else + printf ("Ok\n"); + exit (0); +} + + +static void open_device (char * file_name, int flag) +{ + g_sb.s_dev = open (file_name, flag); + if (g_sb.s_dev == -1) + die ("reiserfsck: can not open '%s': %s", file_name, strerror (errno)); +} + +static void reopen_read_only (char * file_name) +{ + close (g_sb.s_dev); + open_device (file_name, O_RDONLY); +} + +static void reopen_read_write (char * file_name) +{ + close (g_sb.s_dev); + open_device (file_name, O_RDWR); +} + + +/* ubitmap.c: */extern int from_journal; + + +int main (int argc, char * argv []) +{ + char * file_name; + + if (opt_fsck == 0) + printf ("\n\n<-----------REISERFSCK, 1999----------->\n\n"); + + + file_name = parse_options (argc, argv); + if (is_mounted (file_name)) + /* do not work on mounted filesystem for now */ + die ("reiserfsck: '%s' contains a mounted file system\n", file_name); + + + warn_what_will_be_done (); /* and ask confirmation Yes */ + + + if (opt_journal_replay == REPLAY_DEFAULT) + mount_replay (file_name); + + open_device (file_name, O_RDONLY); + + if (uread_super_block (&g_sb)) + die ("reiserfsck: no reiserfs found"); + + if (opt_journal_replay == REPLAY_ALL) { + /* read-write permissions are needed */ + reopen_read_write (file_name); + replay_all (&g_sb); + reopen_read_only (file_name); + } + + + if (nothing_todo (&g_sb)) { + /* this should work when fsck is called by fsck -a */ + printf ("%s: clean, %d/%d %ldk blocks\n", file_name, + SB_BLOCK_COUNT (&g_sb) - SB_FREE_BLOCKS(&g_sb), SB_BLOCK_COUNT (&g_sb), g_sb.s_blocksize / 1024); + brelse (SB_BUFFER_WITH_SB (&g_sb)); + return 0; + } + + + if (opt_fsck_mode == FSCK_DEFAULT) { + check_fs_tree (&g_sb); + release_bitmap (); + release_super_block (); + check_and_free_buffer_mem (); + exit (0); + } + + if (opt_stop_point == STOP_AFTER_REPLAY) { + release_super_block (); + check_and_free_buffer_mem (); + exit (0); + } + + + if (opt_fsck_mode == FSCK_REBUILD) { + reopen_read_write (file_name); + + if (opt_fsck == 1) + printf ("ReiserFS : checking %s\n",file_name); + else + printf ("Rebuilding..\n"); + + reset_super_block (&g_sb); + init_bitmaps (&g_sb); + + /* make file system invalid unless fsck done */ + SB_REISERFS_STATE (&g_sb) = REISERFS_ERROR_FS; + bwrite (SB_BUFFER_WITH_SB (&g_sb)); + /* 1,2. building of the tree */ + build_the_tree (); + + /* 3. semantic pass */ + semantic_pass (); + + /* if --lost+found is set - link unaccessed directories to + lost+found directory */ + pass4 (&g_sb); + + /* 4. look for unaccessed items in the leaves */ + check_unaccessed_items (); + + + if (from_journal) + /* blocks from journal area could get into tree, fix that */ + release_journal_blocks (&g_sb); + + end_fsck (file_name); + } + + + if (opt_fsck_mode == FSCK_FIND_ITEM) { + init_bitmaps (&g_sb); + build_the_tree (); + release_bitmap (); + release_super_block (); + check_and_free_buffer_mem (); + exit (0); + } + + + return 0; +} diff -u -r --new-file linux/fs/reiserfs/utils/fsck/noname.c v2.4.0-test8/linux/fs/reiserfs/utils/fsck/noname.c --- linux/fs/reiserfs/utils/fsck/noname.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/noname.c Tue May 11 16:17:29 1999 @@ -0,0 +1,309 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser + */ + +/*#include <stdio.h>*/ +/*#include <string.h>*/ +/*#include <sys/types.h>*/ +/*#include <asm/bitops.h> +#include "../include/reiserfs_fs.h" +#include "../include/reiserfs_fs_sb.h" +#include "../include/reiserfslib.h"*/ +#include "fsck.h" + + +void get_max_buffer_key (struct buffer_head * bh, struct key * key) +{ + struct item_head * ih; + + ih = B_N_PITEM_HEAD (bh, B_NR_ITEMS (bh) - 1); + copy_key (key, &(ih->ih_key)); + + if (KEY_IS_DIRECTORY_KEY (key)) { + /* copy 3-rd and 4-th key components of the last entry */ + key->k_offset = B_I_DEH (bh, ih)[I_ENTRY_COUNT (ih) - 1].deh_offset; + key->k_uniqueness = DIRENTRY_UNIQUENESS; + } else if (!KEY_IS_STAT_DATA_KEY (key)) + /* get key of the last byte, which is contained in the item */ + key->k_offset += I_BYTES_NUMBER (ih, bh->b_size) - 1; + +} + + +#if 0 +int are_items_mergeable (struct item_head * left, struct item_head * right, int bsize) +{ + if (comp_keys (&left->ih_key, &right->ih_key) != SECOND_GREATER) { + print_key (&(left->ih_key)); + print_key (&(right->ih_key)); + die ("are_items_mergeable: second key is not greater"); + } + + if (comp_short_keys (&left->ih_key, &right->ih_key) != KEYS_IDENTICAL) + return NO; + + if (I_IS_DIRECTORY_ITEM (left)) { + if (!I_IS_DIRECTORY_ITEM (right)) + die ("are_items_mergeable: right item must be of directory type"); + return 1; + } + + if ((I_IS_DIRECT_ITEM (left) && I_IS_DIRECT_ITEM (right)) || + (I_IS_INDIRECT_ITEM (left) && I_IS_INDIRECT_ITEM (right))) + return (left->ih_key.k_offset + I_BYTES_NUMBER (left, bsize) == right->ih_key.k_offset) ? 1 : 0; + + return 0; +} + + +static void decrement_key (struct key * key) +{ + unsigned long * key_field = (unsigned long *)key + REISERFS_FULL_KEY_LEN - 1; + int i; + + for (i = 0; i < REISERFS_FULL_KEY_LEN; i ++, key_field--) + if (*key_field) { + (*key_field)--; + break; + } + + if (i == REISERFS_FULL_KEY_LEN) + die ("decrement_key: zero key found"); +} + + +/* get left neighbor of the leaf node */ +static struct buffer_head * get_left_neighbor (struct path * path) +{ + struct key key; + struct path path_to_left_neighbor; + struct buffer_head * bh; + + copy_key (&key, B_N_PKEY (PATH_PLAST_BUFFER (path), 0)); + decrement_key (&key); + + reiserfsck_search_by_key (&g_sb, &key, &path_to_left_neighbor, comp_keys); + if (PATH_LAST_POSITION (&path_to_left_neighbor) == 0) { + pathrelse (&path_to_left_neighbor); + return 0; + } + bh = PATH_PLAST_BUFFER (&path_to_left_neighbor); + bh->b_count ++; + pathrelse (&path_to_left_neighbor); + return bh; +} + + +int is_left_mergeable (struct path * path) +{ + struct item_head * right; + struct buffer_head * bh; + int retval; + + right = B_N_PITEM_HEAD (PATH_PLAST_BUFFER (path), 0); + + bh = get_left_neighbor (path); + if (bh == 0) { + return 0; + } + retval = are_items_mergeable (B_N_PITEM_HEAD (bh, B_NR_ITEMS (bh) - 1), right, bh->b_size); + brelse (bh); + return retval; +} + + +static struct buffer_head * get_right_neighbor (struct path * path) +{ + struct key key; + struct key * rkey; + struct path path_to_right_neighbor; + struct buffer_head * bh; + struct key maxkey = {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}; + + rkey = get_right_dkey (path); + if (rkey == 0) + copy_key (&key, &maxkey); + else + copy_key (&key, rkey); + + reiserfsck_search_by_key (&g_sb, &key, &path_to_right_neighbor, comp_keys); + if (PATH_PLAST_BUFFER (&path_to_right_neighbor) == PATH_PLAST_BUFFER (path)) { + pathrelse (&path_to_right_neighbor); + return 0; + } + bh = PATH_PLAST_BUFFER (&path_to_right_neighbor); + bh->b_count ++; + pathrelse (&path_to_right_neighbor); + return bh; +} + + +int is_right_mergeable (struct path * path) +{ + struct item_head * left; + struct buffer_head * bh; + int retval; + + left = B_N_PITEM_HEAD (PATH_PLAST_BUFFER (path), B_NR_ITEMS (PATH_PLAST_BUFFER (path)) - 1); + + bh = get_right_neighbor (path); + if (bh == 0) { + return 0; + } + retval = are_items_mergeable (left, B_N_PITEM_HEAD (bh, 0), bh->b_size); + brelse (bh); + return retval; +} + +#endif /*0*/ + + +#if 0 +/* retunrs 1 if buf looks like a leaf node, 0 otherwise */ +static int is_leaf (char * buf) +{ + struct block_head * blkh; + struct item_head * ih; + int used_space; + int prev_location; + int i; + + blkh = (struct block_head *)buf; + ih = (struct item_head *)(buf + BLKH_SIZE) + blkh->blk_nr_item - 1; + used_space = BLKH_SIZE + IH_SIZE * blkh->blk_nr_item + (g_sb.s_blocksize - ih->ih_item_location); + if (used_space != g_sb.s_blocksize - blkh->blk_free_space) + return 0; + ih = (struct item_head *)(buf + BLKH_SIZE); + prev_location = g_sb.s_blocksize; + for (i = 0; i < blkh->blk_nr_item; i ++, ih ++) { + if (ih->ih_item_location >= g_sb.s_blocksize || ih->ih_item_location < IH_SIZE * blkh->blk_nr_item) + return 0; + if (ih->ih_item_len < 1 || ih->ih_item_len > MAX_ITEM_LEN (g_sb.s_blocksize)) + return 0; + if (prev_location - ih->ih_item_location != ih->ih_item_len) + return 0; + prev_location = ih->ih_item_location; + } + + return 1; +} + + +/* retunrs 1 if buf looks like an internal node, 0 otherwise */ +static int is_internal (char * buf) +{ + struct block_head * blkh; + int used_space; + + blkh = (struct block_head *)buf; + used_space = BLKH_SIZE + KEY_SIZE * blkh->blk_nr_item + DC_SIZE * (blkh->blk_nr_item + 1); + if (used_space != g_sb.s_blocksize - blkh->blk_free_space) + return 0; + return 1; +} + + +/* sometimes unfomatted node looks like formatted, if we check only + block_header. This is the reason, why it is so complicated. We + believe only when free space and item locations are ok + */ +int not_formatted_node (char * buf) +{ + struct block_head * blkh; + + blkh = (struct block_head *)buf; + + if (blkh->blk_level < DISK_LEAF_NODE_LEVEL || blkh->blk_level > MAX_HEIGHT) + /* blk_level is out of range */ + return 1; + + if (blkh->blk_nr_item < 1 || blkh->blk_nr_item > (g_sb.s_blocksize - BLKH_SIZE) / IH_SIZE) + /* item number is out of range */ + return 1; + + if (blkh->blk_free_space > g_sb.s_blocksize - BLKH_SIZE - IH_SIZE) + /* free space is out of range */ + return 1; + + /* check format of nodes, such as we are not sure, that this is formatted node */ + if (blkh->blk_level == DISK_LEAF_NODE_LEVEL) + return (is_leaf (buf) == 1) ? 0 : 1; + return (is_internal (buf) == 1) ? 0 : 1; +} + + +int is_internal_node (char * buf) +{ + struct block_head * blkh; + + blkh = (struct block_head *)buf; + if (blkh->blk_level != DISK_LEAF_NODE_LEVEL) + return 1; + return 0; +} + +#endif /*0*/ + +/* +int ready_preserve_list (struct tree_balance * tb, struct buffer_head * bh) +{ + return 0; +} + + +void preserve_shifted ( + struct tree_balance * tb, + struct buffer_head **bh, + struct buffer_head * parent, + int position, + struct buffer_head * dest) +{ + return; +} +*/ + +#if 0 + +char * strs[] = +{"0%",".",".",".",".","20%",".",".",".",".","40%",".",".",".",".","60%",".",".",".",".","80%",".",".",".",".","100%"}; + +char progress_to_be[1024]; +char current_progress[1024]; + +void str_to_be (char * buf, int prosents) +{ + int i; + prosents -= prosents % 4; + buf[0] = 0; + for (i = 0; i <= prosents / 4; i ++) + strcat (buf, strs[i]); +} + + +void print_how_far (unsigned long * passed, unsigned long total) +{ + int n; + + if (*passed == 0) + current_progress[0] = 0; + + if (*passed >= total) { + printf/*die*/ ("print_how_far: total %lu has been reached already. cur=%lu\n", total, ++(*passed)); + return; + } + + (*passed) ++; + n = ((double)((double)(*passed) / (double)total) * (double)100); + + str_to_be (progress_to_be, n); + + if (strlen (current_progress) != strlen (progress_to_be)) + printf ("%s", progress_to_be + strlen (current_progress)); + + strcat (current_progress, progress_to_be + strlen (current_progress)); + + + fflush (stdout); +} +#endif + diff -u -r --new-file linux/fs/reiserfs/utils/fsck/pass1.c v2.4.0-test8/linux/fs/reiserfs/utils/fsck/pass1.c --- linux/fs/reiserfs/utils/fsck/pass1.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/pass1.c Sun May 14 23:37:11 2000 @@ -0,0 +1,487 @@ +/* + * Copyright 1996-2000 Hans Reiser + */ +#include "fsck.h" +#include "reiserfs.h" +#include <stdlib.h> + + + +/* allocates buffer head and copy buffer content */ +static struct buffer_head * make_buffer (int dev, int blocknr, int size, char * data) +{ + struct buffer_head * bh; + + bh = getblk (dev, blocknr, size); + if (buffer_uptodate (bh)) + die ("make_buffer: uptodate buffer found"); + memcpy (bh->b_data, data, size); + set_bit (BH_Uptodate, (char *)&bh->b_state); + return bh; +} + + +void find_a_key (struct key * key, struct buffer_head * bh) +{ + int i; + struct item_head * ih; + + for (i = 0; i < B_NR_ITEMS (bh); i ++) { + ih = B_N_PITEM_HEAD (bh, i); + if (comp_short_keys (key, &(ih->ih_key))) + continue; + reiserfs_warning ("\nblock %d contains key %k (item %d)\n", bh->b_blocknr, key, i); + return; + } +} + + +/* analyse contents of indirect items. If it points to used blocks or + to uninsertable node, which has to be inserted by items - we free + those slots (putting 0-s), if not - mark pointed blocks as used */ +static void handle_indirect_items (struct buffer_head * bh) +{ + int i, j; + struct item_head * ih; + + for (i = 0, ih = B_N_PITEM_HEAD (bh, 0); i < B_NR_ITEMS (bh); i ++, ih ++) { + if (I_IS_INDIRECT_ITEM(ih)) { + __u32 * unp; + + /* check each pointer to unformatted node, if it is in the tree already, put 0 here */ + unp = (__u32 *)B_I_PITEM (bh, ih); + for (j = 0; j < ih->ih_item_len / UNFM_P_SIZE; j ++) { + if (unp[j] >= SB_BLOCK_COUNT (&g_sb) || /* invalid data block */ + !was_block_used (unp[j]) || /* block is marked free in on + disk bitmap */ + is_block_used (unp[j]) || /* that is either it looked + like leaf or other indirect + item contains this pointer + already */ + is_block_uninsertable (unp[j])) { /* block contains leaf + node, its insertion + has been postponed */ + unp[j] = 0; + mark_buffer_dirty (bh, 0); + continue; + } + /* ok, mark that block is in tree and that it is unformatted node */ + mark_block_used (unp[j]); + + /* this is for check only */ + mark_block_unformatted (unp[j]); + } + } + } +} + +int g_unaccessed_items = 0; + +int is_item_accessed (struct item_head * ih) +{ + return (ih->ih_reserved == 0) ? 1 : 0; +} + + +void mark_item_unaccessed (struct item_head * ih) +{ + g_unaccessed_items ++; + ih->ih_reserved = MAX_US_INT; +} + + +void mark_item_accessed (struct item_head * ih, struct buffer_head * bh) +{ + g_unaccessed_items --; + ih->ih_reserved = 0; + mark_buffer_dirty (bh, 0); +} + + +/* used when leaf is inserted into tree by pointer + 1. set sd_nlinks to 0 in all stat data items + 2. mark all items as unaccessed + */ +static void reset_nlinks (struct buffer_head * bh) +{ + int i; + struct item_head * ih; + + ih = B_N_PITEM_HEAD (bh, 0); + for (i = 0; i < B_NR_ITEMS (bh); i ++, ih ++) { + mark_item_unaccessed (ih); + if (I_IS_STAT_DATA_ITEM (ih)) { + add_event (STAT_DATA_ITEMS); + B_I_STAT_DATA (bh, ih)->sd_nlink = 0; + } + } + + mark_buffer_dirty (bh, 0); +} + + +static void insert_pointer (struct buffer_head * bh, struct path * path) +{ + struct tree_balance tb; + struct item_head * ih; + char * body; + int memmode; + int zeros_number; + + init_tb_struct (&tb, &g_sb, path, 0x7fff); + tb.preserve_mode = NOTHING_SPECIAL; + + /* fix_nodes & do_balance must work for internal nodes only */ + ih = 0; + path->pos_in_item = PATH_LAST_POSITION (path); /* not needed */ + if (fix_nodes (M_INTERNAL, &tb, ih) != CARRY_ON) + die ("insert_pointer: no free space on device"); + + /* child_pos: we insert after position child_pos: this feature of the insert_child */ + /* there is special case: we insert pointer after + (-1)-st key (before 0-th key) in the parent */ + if (PATH_LAST_POSITION (path) == 0 && path->pos_in_item == 0) + PATH_H_B_ITEM_ORDER (path, 0) = -1; + else { + if (PATH_H_PPARENT (path, 0) == 0) + PATH_H_B_ITEM_ORDER (path, 0) = 0; +/* PATH_H_B_ITEM_ORDER (path, 0) = PATH_H_PPARENT (path, 0) ? PATH_H_B_ITEM_ORDER (path, 0) : 0;*/ + } + + ih = 0; + body = (char *)bh; + memmode = 0; + zeros_number = 0; + + do_balance (&tb, ih, body, M_INTERNAL, zeros_number); + + /* mark as used block itself and pointers to unformatted nodes */ + mark_block_used (bh->b_blocknr); + + /* this is for check only */ + mark_block_formatted (bh->b_blocknr); + reset_nlinks (bh); + handle_indirect_items (bh); + + /* statistic */ + add_event (GOOD_LEAVES); + +} + + +/* return 1 if left and right can be joined. 0 otherwise */ +int balance_condition_fails (struct buffer_head * left, struct buffer_head * right) +{ + if (B_FREE_SPACE (left) >= B_CHILD_SIZE (right) - + (are_items_mergeable (B_N_PITEM_HEAD (left, B_NR_ITEMS (left) - 1), B_N_PITEM_HEAD (right, 0), left->b_size) ? IH_SIZE : 0)) + return 1; + return 0; +} + + +/* return 1 if new can be joined with last node on the path or with + its right neighbor, 0 otherwise */ +int balance_condition_2_fails (struct buffer_head * new, struct path * path) +{ + struct buffer_head * bh; + struct key * right_dkey; + int pos, used_space; + struct path path_to_right_neighbor; + + bh = PATH_PLAST_BUFFER (path); + + + if (balance_condition_fails (bh, new)) + /* new node can be joined with last buffer on the path */ + return 1; + + /* new node can not be joined with its left neighbor */ + + right_dkey = uget_rkey (path); + if (right_dkey == 0) + /* there is no right neighbor */ + return 0; + + pos = PATH_H_POSITION (path, 1); + if (pos == B_NR_ITEMS (bh = PATH_H_PBUFFER (path, 1))) { + /* we have to read parent of right neighbor. For simplicity we + call search_by_key, which will read right neighbor as well */ + init_path (&path_to_right_neighbor); + if (usearch_by_key (&g_sb, right_dkey, &path_to_right_neighbor, 0, + DISK_LEAF_NODE_LEVEL, 0, comp_keys) != ITEM_FOUND) + die ("get_right_neighbor_free_space: invalid right delimiting key"); + used_space = B_CHILD_SIZE (PATH_PLAST_BUFFER (&path_to_right_neighbor)); + pathrelse (&path_to_right_neighbor); + } + else + used_space = B_N_CHILD (bh, pos + 1)->dc_size; + + + if (B_FREE_SPACE (new) >= used_space - + (are_items_mergeable (B_N_PITEM_HEAD (new, B_NR_ITEMS (new) - 1), (struct item_head *)right_dkey, new->b_size) ? IH_SIZE : 0)) + return 1; + + return 0; +} + + +static void get_max_buffer_key (struct buffer_head * bh, struct key * key) +{ + struct item_head * ih; + + ih = B_N_PITEM_HEAD (bh, B_NR_ITEMS (bh) - 1); + copy_key (key, &(ih->ih_key)); + + if (KEY_IS_DIRECTORY_KEY (key)) { + /* copy 3-rd and 4-th key components of the last entry */ + key->k_offset = B_I_DEH (bh, ih)[I_ENTRY_COUNT (ih) - 1].deh_offset; + key->k_uniqueness = DIRENTRY_UNIQUENESS; + } else if (!KEY_IS_STAT_DATA_KEY (key)) + /* get key of the last byte, which is contained in the item */ + key->k_offset += I_BYTES_NUMBER (ih, bh->b_size) - 1; + +} + + + +/* inserts pointer to leaf into tree if possible. If not, marks node as uninsrtable */ +static void try_to_insert_pointer_to_leaf (struct buffer_head * new_bh) +{ + struct path path; + struct buffer_head * bh; /* last path buffer */ + struct key * first_bh_key, last_bh_key; /* first and last keys of new buffer */ + struct key last_path_buffer_last_key, * right_dkey; + int ret_value; + + path.path_length = ILLEGAL_PATH_ELEMENT_OFFSET; + + first_bh_key = B_N_PKEY (new_bh, 0); + + ret_value = usearch_by_key (&g_sb, first_bh_key, &path, 0, DISK_LEAF_NODE_LEVEL, READ_BLOCKS, comp_keys); + if (ret_value == KEY_FOUND) + goto cannot_insert; + + + /* get max key in the new node */ + get_max_buffer_key (new_bh, &last_bh_key); + bh = PATH_PLAST_BUFFER (&path); + if (comp_keys ((unsigned long *)B_N_PKEY (bh, 0), (unsigned long *)&last_bh_key) == FIRST_GREATER) { + /* new buffer falls before the leftmost leaf */ + if (balance_condition_fails (new_bh, bh)) + goto cannot_insert; + + if (uget_lkey (&path) != 0 || PATH_LAST_POSITION (&path) != 0) + die ("try_to_insert_pointer_to_leaf: bad search result"); + + path.pos_in_item = 0; + goto insert; + } + + /* get max key of buffer, that is in tree */ + get_max_buffer_key (bh, &last_path_buffer_last_key); + if (comp_keys (&last_path_buffer_last_key, first_bh_key) != SECOND_GREATER) + /* first key of new buffer falls in the middle of node that is in tree */ + goto cannot_insert; + + right_dkey = uget_rkey (&path); + if (right_dkey && comp_keys (right_dkey, &last_bh_key) != FIRST_GREATER) { + goto cannot_insert; + } + + if (balance_condition_2_fails (new_bh, &path)) + goto cannot_insert; + + insert: + insert_pointer (new_bh, &path); + + goto out; + + cannot_insert: + /* statistic */ + add_event (UNINSERTABLE_LEAVES); + mark_block_uninsertable (new_bh->b_blocknr); + + out: + pathrelse (&path); + brelse (new_bh); + return; +} + + + + +static int tree_is_empty (void) +{ + return (SB_ROOT_BLOCK (&g_sb) == ~0) ? 1 : 0; +} + + +static void make_single_leaf_tree (struct buffer_head * bh) +{ + /* tree is empty, make tree root */ + SB_ROOT_BLOCK (&g_sb) = bh->b_blocknr; + SB_TREE_HEIGHT (&g_sb) = 2; + + mark_block_used (bh->b_blocknr); + + /* this is for check only */ + mark_block_formatted (bh->b_blocknr); + + /* set stat data nlinks fields to 0, mark all items as unaccessed, analyse contents of indirect + items */ + reset_nlinks (bh); + handle_indirect_items (bh); + + /* statistic */ + add_event (GOOD_LEAVES); + + brelse (bh); +} + + +/* reads the device by set of 8 blocks, takes leaves and tries to + insert them into tree */ +void build_the_tree (void) +{ + int i, j, k; + struct buffer_head * bbh, * bh; + __u32 handled_blocks = 0; + struct si * saved_items = 0; + + if ( opt_fsck == 0 ) + fprintf (stderr, "Pass 1 - "); + + for (i = 0; i < SB_BMAP_NR (&g_sb); i ++) + for (j = 0; j < g_sb.s_blocksize; j ++) { + /* make sure, that we are not out of the device */ + if (i * g_sb.s_blocksize * 8 + j * 8 == SB_BLOCK_COUNT (&g_sb)) + goto out_of_bitmap; + + if (i * g_sb.s_blocksize * 8 + j * 8 + 8 > SB_BLOCK_COUNT (&g_sb)) + die ("build_the_tree: Out of bitmap"); + + if (SB_AP_CAUTIOUS_BITMAP (&g_sb)[i]->b_data[j] == 0) { + /* all blocks are free */ + continue; + } + + bbh = bread (g_sb.s_dev, i * g_sb.s_blocksize + j, g_sb.s_blocksize * 8); + for (k = 0; k < 8; k ++) { + unsigned long block; + + if ((SB_AP_CAUTIOUS_BITMAP (&g_sb)[i]->b_data[j] & (1 << k)) == 0) { + /* k-th block is free */ + continue; + } + + block = i * g_sb.s_blocksize * 8 + j * 8 + k; + if ( opt_fsck == 0 ) + print_how_far (&handled_blocks, g_blocks_to_read); + + if (not_formatted_node (bbh->b_data + k * g_sb.s_blocksize, g_sb.s_blocksize)) + continue; + if (is_internal_node (bbh->b_data + k * g_sb.s_blocksize) == 1) { + if (!is_block_used (block)) + reiserfs_free_internal_block (&g_sb, i * g_sb.s_blocksize * 8 + j * 8 + k); + else + /* block is used in new tree already. There was an + indirect item, pointing to it. We keep information + about it for check only */ + /*mark_formatted_pointed_by_indirect (block)*/; + + continue; + } + + /* leaf node found */ + bh = make_buffer (g_sb.s_dev, block, g_sb.s_blocksize, bbh->b_data + k * g_sb.s_blocksize); + + /* */ + if (opt_find) { + find_a_key (&key_to_find, bh); + brelse (bh); + continue; + } + + if (block <= SB_AP_CAUTIOUS_BITMAP (&g_sb)[SB_BMAP_NR (&g_sb) - 1]->b_blocknr) { + /* block is neither bitmap block nor super block*/ + brelse (bh); + continue; + } + + if (is_block_used (block)) { + /* block is used in new tree already. There was an indirect + item, pointing to it. We keep information about it for + check only */ +/* mark_formatted_pointed_by_indirect (block);*/ + + add_event (LEAVES_USED_BY_INDIRECT_ITEMS); + /* Rather than try to find UNP to this block we save its + items and will put them into tree at the end of pass 1 */ + for_all_items_in_node (save_item, &saved_items, bh); + brelse (bh); + continue; + } + + if (is_leaf_bad (bh)) { + /* leaf is bad: directory item structure corrupted, or something else */ +/* mark_formatted_pointed_by_indirect (block);*/ + if (opt_verbose) + reiserfs_warning ("\nbuild_the_tree: bad leaf encountered: %lu\n", bh->b_blocknr); + add_event (LEAVES_USED_BY_INDIRECT_ITEMS); + /* Save good items only to put them into tree at the end of this pass */ + for_all_items_in_node (save_item, &saved_items, bh); + brelse (bh); + continue; + } + + if (tree_is_empty () == 1) { + make_single_leaf_tree (bh); + continue; + } + + /* if the leaf node can not be inserted into tree by pointer, + we postpone its insertion at the end of the pass 1 */ + try_to_insert_pointer_to_leaf (bh); + + if (opt_check == 1) + reiserfsck_check_pass1 (); + } + + bforget (bbh); + } + + + out_of_bitmap: + + + if (opt_find) + exit (EXIT_SUCCESS); + + /* this checks what has been built (if -c option is set) */ + reiserfsck_check_pass1 (); + + /* put saved items into tree. These items were in leaves, those + could not be inserted into tree because some indirect items point + to those leaves. Rather than lookup for corresponding unfm + pointers in the tree, we save items of those leaves and put them + into tree separately */ + if ( opt_fsck == 0 ) + printf ("\nPass 1a - "); + put_saved_items_into_tree (saved_items); + if ( opt_fsck == 0 ) + printf ("done\n"); + + /* end of pass 1 */ + if ( opt_fsck == 0 ) + printf ("\n"); + + /* this works only if -c specified */ + reiserfsck_check_pass1 (); + + /* pass 2 */ + take_bad_blocks_put_into_tree (); + + reiserfsck_check_pass1 (); + +} + diff -u -r --new-file linux/fs/reiserfs/utils/fsck/pass2.c v2.4.0-test8/linux/fs/reiserfs/utils/fsck/pass2.c --- linux/fs/reiserfs/utils/fsck/pass2.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/pass2.c Sun May 14 23:37:11 2000 @@ -0,0 +1,234 @@ +/* + * Copyright 1996-1999 Hans Reiser + */ +#include "fsck.h" +#include "reiserfs.h" + +void for_all_items_in_node (action_on_item_t action, struct si ** si, struct buffer_head * bh) +{ + int i; + struct item_head * ih; + + for (i = 0, ih = B_N_PITEM_HEAD (bh, 0); i < B_NR_ITEMS (bh); i ++, ih ++) + action (si, ih, B_I_PITEM (bh,ih)); +#if 0 + int j; + + for (i = B_NR_ITEMS (bh) / 2, j = i + 1; ; i --, j ++) { + if (i >= 0) { + ih = B_N_PITEM_HEAD (bh, i); + action (si, ih, B_I_PITEM (bh,ih)); + } + if (j < B_NR_ITEMS (bh)) { + ih = B_N_PITEM_HEAD (bh, j); + action (si, ih, B_I_PITEM (bh,ih)); + } + +/* check_buffer_queues ();*/ + + if (i <= 0 && j >= B_NR_ITEMS (bh) - 1) + break; + } +#endif +} + + +/* insert sd item if it does not exist, overwrite it otherwise */ +static void put_sd_item_into_tree (struct item_head * comingih, char * item) +{ + struct item_head ih; + struct path path; + struct buffer_head * path_bh; + int path_item_num; + struct stat_data * psd; + + copy_key (&(ih.ih_key), &(comingih->ih_key)); + if (usearch_by_key (&g_sb, &(ih.ih_key), &path, 0, DISK_LEAF_NODE_LEVEL, READ_BLOCKS, comp_keys) == ITEM_FOUND) { + /* overwrite stat data in the tree */ + path_bh = PATH_PLAST_BUFFER (&path); + path_item_num = PATH_LAST_POSITION (&path); + psd = B_N_STAT_DATA (path_bh, path_item_num); + if (psd->sd_nlink != 0) + die ("put_sd_item_into_tree: all stat data in the tree (at this moment) must have nllinks == 0 (not %d)", + psd->sd_nlink); + if (psd->sd_mtime > ((struct stat_data *)item)->sd_mtime) { + /* new sd is newer than the found one */ + memcpy (psd, item, SD_SIZE); + psd->sd_nlink = 0; + psd->u.sd_first_direct_byte = NO_BYTES_IN_DIRECT_ITEM; + mark_buffer_dirty (PATH_PLAST_BUFFER (&path), 0); + } + pathrelse (&path); + } else { + struct stat_data sd; + + ih.ih_item_len = SD_SIZE; + set_ih_free_space (&ih, MAX_US_INT); + mark_item_unaccessed (&ih); + memcpy (&sd, item, SD_SIZE); + sd.sd_nlink = 0; + sd.u.sd_first_direct_byte = NO_BYTES_IN_DIRECT_ITEM; + reiserfsck_insert_item (&path, &ih, (const char *)&sd); + + add_event (STAT_DATA_ITEMS); + } +} + + +/* Keyed 32-bit hash function using TEA in a Davis-Meyer function */ +/* +static unsigned long get_third_component (char * name, int len) +{ + if (!len || (len == 1 && name[0] == '.')) + return DOT_OFFSET; + if (len == 2 && name[0] == '.' && name[1] == '.') + return DOT_DOT_OFFSET; + return keyed_hash (name, len); +} +*/ + +static int reiserfsck_find_entry (struct key * key, struct reiserfs_de_head * deh, struct path * path) +{ + struct key entry_key; + + copy_key (&entry_key, key); + set_k_offset (&entry_key, deh->deh_offset); + set_k_type (&entry_key, TYPE_DIRENTRY); + + return usearch_by_entry_key (&g_sb, &entry_key, path); +} + + +/* this tries to put each item entry to the tree, if there is no items of + the directory, insert item containing 1 entry */ +static void put_directory_item_into_tree (struct item_head * comingih, char * item) +{ + /* struct item_head * ih;*/ + struct reiserfs_de_head * deh; + int i, retval; + struct path path; + int size; + char * buf, * entry; + struct item_head tmpih; + + /*ih = B_N_PITEM_HEAD (bh, item_num);*/ + deh = (struct reiserfs_de_head *)item;/*B_I_DEH (bh, comingih);*/ + + for (i = 0; i < I_ENTRY_COUNT (comingih); i ++, deh ++) { + entry = item + deh->deh_location; + retval = reiserfsck_find_entry (&(comingih->ih_key), deh, &path); + switch (retval) { + case ENTRY_FOUND: + pathrelse (&path); + break; + + case ENTRY_NOT_FOUND: + /* paste_into_item accepts entry to paste as buffer, beginning + with entry header and body, that follows it */ + buf = reiserfs_kmalloc (size = I_DEH_N_ENTRY_LENGTH (comingih, deh, i) + DEH_SIZE); + memcpy (buf, deh, DEH_SIZE); + ((struct reiserfs_de_head *)buf)->deh_location = 0; + memcpy (buf + DEH_SIZE, entry, size - DEH_SIZE); + + reiserfsck_paste_into_item (&path, buf, size); + + freemem (buf); + break; + + case DIRECTORY_NOT_FOUND: + buf = reiserfs_kamlloc (size = I_DEH_N_ENTRY_LENGTH (comingih, deh, i) + DEH_SIZE); + memcpy (buf, deh, DEH_SIZE); + ((struct reiserfs_de_head *)buf)->deh_location = DEH_SIZE; + memcpy (buf + DEH_SIZE, entry, size - DEH_SIZE); + copy_key (&(tmpih.ih_key), &(comingih->ih_key)); + tmpih.ih_item_len = size; + tmpih.u.ih_entry_count = 1; + mark_item_unaccessed (&tmpih); + + reiserfsck_insert_item (&path, &tmpih, buf); + + freemem (buf); + break; + + case REGULAR_FILE_FOUND: + /* this should never happen. */ + goto end; + } + + /*&&&&&&&&&&&&&&&&&*/ +/* reiserfsck_check_pass1 ();*/ + /*&&&&&&&&&&&&&&&&&*/ + } + end: + +} + + +/* If item is item of regular file (direct or indirect item) - this + file is in tree (with first byte) - write to it. If this file is in + tree (without first byte) - delete what we have in tree, create + file again keeping what we already had in tree this file is not in + tree - create hole at the beginning of file if necessary and write + to file */ +void put_regular_file_item_into_tree (struct item_head * ih, char * item) +{ + reiserfsck_file_write (ih, item); +} + + +void insert_item_separately (struct si ** si, struct item_head * ih, char * item) +{ + if (I_IS_STAT_DATA_ITEM (ih)) { + put_sd_item_into_tree (ih, item); + } else if (I_IS_DIRECTORY_ITEM (ih)) { + put_directory_item_into_tree (ih, item); + } else { + put_regular_file_item_into_tree (ih, item); + } + + +} + + +/* uninsertable blocks are marked by 0s in + g_uninsertable_leaf_bitmap during the pass 1. They still must be not in the tree */ +void take_bad_blocks_put_into_tree (void) +{ + struct buffer_head * bh; + int i, j; + __u32 bb_counter = 0; + + if ( opt_fsck == 0 ) + fprintf (stderr, "Pass 2 - "); + + + for (i = 0; i < SB_BMAP_NR (&g_sb); i ++) { + j = find_first_zero_bit (g_uninsertable_leaf_bitmap[i], g_sb.s_blocksize * 8); + while (j < g_sb.s_blocksize * 8) { + bh = bread (g_sb.s_dev, i * g_sb.s_blocksize * 8 + j, g_sb.s_blocksize); + + if (is_block_used (bh->b_blocknr)) + die ("take_bad_blocks_put_into_tree: block %d can not be in tree", bh->b_blocknr); + /* this must be leaf */ + if (not_formatted_node (bh->b_data, g_sb.s_blocksize) || is_internal_node (bh->b_data)) { + reiserfs_panic (0, "take_bad_blocks_put_into_tree: buffer (%b %z) must contain leaf", bh, bh); + } + + for_all_items_in_node (insert_item_separately, 0, bh); + + if ( opt_fsck == 0 ) + print_how_far (&bb_counter, get_event (UNINSERTABLE_LEAVES)); + + brelse (bh); + + j = find_next_zero_bit (g_uninsertable_leaf_bitmap[i], g_sb.s_blocksize * 8, j + 1); + } + } + + if (bb_counter != get_event (UNINSERTABLE_LEAVES)) + die ("take_bad_blocks_put_into_tree: found bad block %d, must be %d", + bb_counter, get_event (UNINSERTABLE_LEAVES)); + + if ( opt_fsck == 0 ) + printf ("\n"); +} diff -u -r --new-file linux/fs/reiserfs/utils/fsck/pass4.c v2.4.0-test8/linux/fs/reiserfs/utils/fsck/pass4.c --- linux/fs/reiserfs/utils/fsck/pass4.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/pass4.c Sun May 30 17:38:45 1999 @@ -0,0 +1,96 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser + */ +/*#include <stdio.h> +#include <time.h> +#include <stdlib.h>*/ +/*#include <asm/bitops.h> +#include "../include/reiserfs_fs.h" +#include "../include/reiserfs_fs_sb.h" +#include "../include/reiserfslib.h"*/ +#include "fsck.h" + + +static void get_next_key (struct path * path, int i, struct key * key) +{ + struct buffer_head * bh = PATH_PLAST_BUFFER (path); + struct key maxkey = {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}; + struct key * rkey; + + if (i < B_NR_ITEMS (bh) - 1) { + copy_key (key, B_N_PKEY (bh, i + 1)); + return; + } + + rkey = uget_rkey (path); + if (rkey) { + copy_key (key, rkey); + if (comp_keys (key, B_PRIGHT_DELIM_KEY (bh)) != KEYS_IDENTICAL) { + add_event (FIXED_RIGHT_DELIM_KEY); + copy_key (B_PRIGHT_DELIM_KEY (bh), key); + mark_buffer_dirty (bh, 0); + } + } else { + if (comp_keys (&maxkey, B_PRIGHT_DELIM_KEY (bh)) != KEYS_IDENTICAL) { + /*printf ("get_next_key: Hmm, max key not found in the tree\n");*/ + copy_key (B_PRIGHT_DELIM_KEY (bh), &maxkey); + mark_buffer_dirty (bh, 0); + } + copy_key (key, &maxkey); + } +} + + +int check_unaccessed_items () +{ + struct key key; + struct path path; + int i; + struct buffer_head * bh; + struct item_head * ih; + __u32 passed = 0; + + path.path_length = ILLEGAL_PATH_ELEMENT_OFFSET; + copy_key (&key, &g_root_directory_key); + + if ( opt_fsck == 0 ) + fprintf (stderr, "Pass 4 - "); + + while (/*reiserfsck_*/usearch_by_key (&g_sb, &key, &path, 0, DISK_LEAF_NODE_LEVEL, READ_BLOCKS, comp_keys) == ITEM_FOUND) { + bh = PATH_PLAST_BUFFER (&path); + for (i = PATH_LAST_POSITION (&path), ih = PATH_PITEM_HEAD (&path); i < B_NR_ITEMS (bh); i ++, ih ++) { + if (is_item_accessed (ih) == 0) { + + get_next_key (&path, i, &key); + + add_event (UNACCESSED_ITEMS); + if (I_IS_STAT_DATA_ITEM (ih)) + g_fsck_info.fs_stat_data_items --; + + PATH_LAST_POSITION (&path) = i; + reiserfsck_delete_item (&path); + + goto cont; + } + if ((I_IS_STAT_DATA_ITEM (ih)) && opt_fsck == 0) { + print_how_far (&passed, get_event (STAT_DATA_ITEMS)); + } + } + get_next_key (&path, i - 1, &key); + pathrelse (&path); + +/*fu_check ();*/ + + cont: + } + if (key.k_dir_id != MAX_UL_INT || key.k_objectid != MAX_UL_INT || + key.k_offset != MAX_UL_INT || key.k_uniqueness != MAX_UL_INT) { + reiserfs_panic (0, "check_unaccessed_items: invalid exit key %k", &key); + } + pathrelse (&path); + + if ( opt_fsck == 0 ) + printf ("\n"); + + return 0; +} diff -u -r --new-file linux/fs/reiserfs/utils/fsck/reiserfsck.8 v2.4.0-test8/linux/fs/reiserfs/utils/fsck/reiserfsck.8 --- linux/fs/reiserfs/utils/fsck/reiserfsck.8 Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/reiserfsck.8 Tue May 11 16:17:29 1999 @@ -0,0 +1,60 @@ +.\" -*- nroff -*- +.\" Copyright 1996, 1997 Hans Reiser. +.\" +.TH REISERFSCK 8 "February 1999" "Reiserfs utilities" +.SH NAME +reiserfsck \- check a Linux Reiserfs file system +.SH SYNOPSIS +.B reiserfsck +[ +.B \-aprvfy +] +.I device +.SH DESCRIPTION +.B reiserfsck +is used to perform a consistency check for the Linux Reiserfs file +system. +.TP +.I device +is the special file corresponding to the device (e.g /dev/hdXX for +IDE disk partition or /dev/sdXX for SCSI disk partition). +.SH OPTIONS +.TP +.I -a +This is provided for backwards compatibility only. It does the same +thing as the +.I -p +option described below; it is recommended to use +.I -p +option when possible. +.TP +.I -f +Force checking even if the file system seems clean. +.TP +.I -p +Automatically repair ("preen") the file system without any questions. +.TP +.I -r +This option does nothing at all; it is provided only for backwards +compatibility. +.TP +.I -v +Verbose mode. +.TP +.I -y +Assume an answer of ``yes'' to all questions; allows +.B reiserfsck +to be used non-interactively. +.\" .SH AUTHOR +.\" This version of +.\" .B reiserfsck +.\" has been written by Hans Reiser <reiser@idiom.com>. +.SH BUGS +Not known yet. +Please, report bugs to Hans Reiser <reiser@idiom.com>. +.SH AVAILABILITY +.B reiserfsck +sources are available for anonymous ftp from namesys.botik.ru +in /pub/linux+reiserfs/reiserfs-utils.tgz +.SH SEE ALSO +.BR mkreiserfs (8) diff -u -r --new-file linux/fs/reiserfs/utils/fsck/segments.c v2.4.0-test8/linux/fs/reiserfs/utils/fsck/segments.c --- linux/fs/reiserfs/utils/fsck/segments.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/segments.c Sun May 14 23:37:11 2000 @@ -0,0 +1,223 @@ +/* + * Copyright 1998 Hans Reiser + */ +/*#include <stdio.h> +#include <string.h>*/ +/*#include <asm/bitops.h> +#include "../include/reiserfs_fs.h" +#include "../include/reiserfs_fs_sb.h" +#include "../include/reiserfslib.h"*/ +#include "fsck.h" + + +/* there is a situation, when we overwrite contents of unformatted + node with direct item. One unformatted node can be overwritten + several times by direct items */ +/* +struct overwritten_unfm_segment { + int ous_begin; + int ous_end; + struct overwritten_unfm_segment * ous_next; +}; +*/ +struct overwritten_unfm { + unsigned long ou_unfm_ptr; /* block number of unfm node */ + unsigned long ou_dir_id; + unsigned long ou_objectid; /* key corresponding to an unfm node */ + unsigned long ou_offset; + + struct overwritten_unfm_segment * ou_segments; /* list of segmens, than have been overwritten in ths unfm node */ +}; + +struct overwritten_unfm ** g_overwritten_unfms; +int g_overwritten_unfms_amount; /* number of unformatted nodes, which contain direct items */ + + +/* adds segment to the single linked list of segments sorted by begin + field. Retuns pointer to first element of list */ +static struct overwritten_unfm_segment * add_segment (struct overwritten_unfm_segment * first, int begin, int end) +{ + struct overwritten_unfm_segment * new, * next, * prev; + + new = reiserfs_kmalloc (sizeof (struct overwritten_unfm_segment)); + new->ous_begin = begin; + new->ous_end = end; + new->ous_next = 0; + + next = first; + prev = 0; + while (next) { + if (next->ous_begin > begin) + break; + prev = next; + next = next->ous_next; + } + + if (prev == 0) { + /* insert into head of list */ + first = new; + } else { + prev->ous_next = new; + } + new->ous_next = next; + return first; +} + + +/* input parameter + `list_head` - first element of overlapping segments sorted by left edge + `unoverwritten_segment` - returned by previous call of get_unoverwritten_segment or (-2,-2) if called first time + */ +/* returns + 1 and segment unoverwritten by elements of list `list_head` + 0 if there isno such segment + */ +int get_unoverwritten_segment (struct overwritten_unfm_segment * list_head, struct overwritten_unfm_segment * unoverwritten_segment) +{ + int end; + + /* look for segment, which has begin field greater than end of previous interval */ + while (list_head->ous_begin <= unoverwritten_segment->ous_end) { + list_head = list_head->ous_next; + } + /* look for the end of the continuous region covered by otrezkami */ + end = list_head->ous_end; + while (list_head->ous_next) { + if (list_head->ous_next->ous_begin > end + 1) + /* intreval found */ + break; + if (list_head->ous_next->ous_end > end) + end = list_head->ous_next->ous_end; + list_head = list_head->ous_next; + } + /* ok, between segment and segment->next we have an interval (segment->next != 0) */ + if (list_head->ous_next != 0) { + unoverwritten_segment->ous_begin = end + 1; + unoverwritten_segment->ous_end = list_head->ous_next->ous_begin - 1; + return 1; + } + return 0; +} + + +void print_segments (struct overwritten_unfm_segment * list_head) +{ + struct overwritten_unfm_segment * cur; + + cur = list_head; + while (cur) { + printf ("%s%d %d%s", cur == list_head ? "(" : "", cur->ous_begin, cur->ous_end, cur->ous_next ? ", " : ")\n"); + cur = cur->ous_next; + } +} + + +/* this prepare list of segments to extracting of unoverwritten segments */ +struct overwritten_unfm_segment * find_overwritten_unfm (unsigned long unfm, int length, struct overwritten_unfm_segment * segment_to_init) +{ + int i; + + for (i = 0; i < g_overwritten_unfms_amount && g_overwritten_unfms[i] != 0; i ++) + if (g_overwritten_unfms[i]->ou_unfm_ptr == unfm) { + if (g_overwritten_unfms[i]->ou_segments == 0) + die ("find_overwritten_unfm: no segment found"); + g_overwritten_unfms[i]->ou_segments = add_segment (g_overwritten_unfms[i]->ou_segments, -1, -1); + add_segment (g_overwritten_unfms[i]->ou_segments, length, length); + segment_to_init->ous_begin = -2; + segment_to_init->ous_end = -2; + return g_overwritten_unfms[i]->ou_segments; + } + return 0; +} + +struct overwritten_unfm * look_for_overwritten_unfm (__u32 unfm) +{ + int i; + + for (i = 0; i < g_overwritten_unfms_amount && g_overwritten_unfms[i] != 0; i ++) + if (g_overwritten_unfms[i]->ou_unfm_ptr == unfm) + return g_overwritten_unfms[i]; + return 0; +} + +#define GROW_BY 10 +struct overwritten_unfm * add_overwritten_unfm (unsigned long unfm, struct item_head * direct_ih) +{ + int i; + + for (i = 0; i < g_overwritten_unfms_amount && g_overwritten_unfms[i] != 0; i ++) { + if (g_overwritten_unfms[i]->ou_unfm_ptr == unfm) + return g_overwritten_unfms[i]; + } + + if (i == g_overwritten_unfms_amount) { + g_overwritten_unfms = expandmem (g_overwritten_unfms, sizeof (struct overwritten_unfm *) * i, + sizeof (struct overwritten_unfm *) * GROW_BY); + g_overwritten_unfms_amount += GROW_BY; + } + g_overwritten_unfms[i] = reiserfs_kmalloc (sizeof (struct overwritten_unfm)); + g_overwritten_unfms[i]->ou_unfm_ptr = unfm; + g_overwritten_unfms[i]->ou_dir_id = direct_ih->ih_key.k_dir_id; + g_overwritten_unfms[i]->ou_objectid = direct_ih->ih_key.k_objectid; + g_overwritten_unfms[i]->ou_offset = direct_ih->ih_key.k_offset - (direct_ih->ih_key.k_offset - 1) % g_sb.s_blocksize; + return g_overwritten_unfms[i]; +} + + +void save_unfm_overwriting (unsigned long unfm, struct item_head * direct_ih) +{ + struct overwritten_unfm * ov_unfm; + + /* add new overwritten unfm or return existing one */ + ov_unfm = add_overwritten_unfm (unfm, direct_ih); + ov_unfm->ou_segments = add_segment (ov_unfm->ou_segments, (direct_ih->ih_key.k_offset - 1) % g_sb.s_blocksize, + (direct_ih->ih_key.k_offset - 1) % g_sb.s_blocksize + direct_ih->ih_item_len - 1); +} + + +void free_overwritten_unfms (void) +{ + int i; + + for (i = 0; i < g_overwritten_unfms_amount && g_overwritten_unfms[i]; i ++) { + /* free all segments */ + while (g_overwritten_unfms[i]->ou_segments) { + struct overwritten_unfm_segment * tmp; + + tmp = g_overwritten_unfms[i]->ou_segments->ous_next; + freemem (g_overwritten_unfms[i]->ou_segments); + g_overwritten_unfms[i]->ou_segments = tmp; + } + /* free struct overwritten_unfm */ + freemem (g_overwritten_unfms[i]); + } + + /* free array of pointers to overwritten unfms */ + if (g_overwritten_unfms) + freemem (g_overwritten_unfms); +} + +#if 0 +static int formatted_pointed_by_indirect; +static __u32 * stored; +static int length; +void mark_formatted_pointed_by_indirect (__u32 block) +{ + if (stored == 0 || length == formatted_pointed_by_indirect) { + stored = expandmem (stored, sizeof (__u32) * length, sizeof (__u32) * 1000); + length += 1000; + } + stored [formatted_pointed_by_indirect ++] = block; +} + +int is_formatted_pointed_by_indirect (__u32 block) +{ + int i; + + for (i = 0; i < formatted_pointed_by_indirect; i ++) + if (stored [i] == block) + return 1; + + return 0; +} +#endif diff -u -r --new-file linux/fs/reiserfs/utils/fsck/semantic.c v2.4.0-test8/linux/fs/reiserfs/utils/fsck/semantic.c --- linux/fs/reiserfs/utils/fsck/semantic.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/semantic.c Sun May 14 23:37:11 2000 @@ -0,0 +1,451 @@ +/* + * Copyright 1996-1999 Hans Reiser + */ +#include "fsck.h" +#include <time.h> + + +/*#define ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u))*/ + +/* path is path to stat data */ +static void check_regular_file (struct path * path, struct stat_data * sd) +{ + int mark_passed_items; + struct key key; + unsigned long size; + struct buffer_head * bh = PATH_PLAST_BUFFER (path);/* contains stat data */ + struct item_head * ih = PATH_PITEM_HEAD (path);/* stat data item */ + + if (sd->sd_nlink == 0) { + +/* print_how_far (&stat_datas, get_event (STAT_DATA_ITEMS));*/ + if ((sd->sd_mode & S_IFMT) == S_IFREG) + add_event (REGULAR_FILES); + else if ((sd->sd_mode & S_IFMT) == S_IFLNK) + add_event (SYMLINKS); + else + add_event (OTHERS); + sd->sd_nlink ++; + mark_item_accessed (ih, bh); + mark_objectid_as_used (ih->ih_key.k_objectid); + + copy_key (&key, &(ih->ih_key)); + if (are_file_items_correct (&key, &size, mark_passed_items = 1, path, &sd) != 1) { + /* unpassed items will be deleted in pass 4 as they left unaccessed */ + add_event (INCORRECT_REGULAR_FILES); + } + /* are_file_items_correct could perform indirect_to_direct, bh could be changed */ + bh = PATH_PLAST_BUFFER (path); + /* set correct size */ +#ifdef DONT_USE_FREE_SPACE + if (ROUND_UP (sd->sd_size, g_sb.s_blocksize) != size) { +#else + if (sd->sd_size != size) { +#endif + add_event (FIXED_SIZE_FILES); + sd->sd_size = size; + mark_buffer_dirty (bh, 0); + } + /* set first direct byte field of stat data (if it is set incorrect) */ + if (size == 0 || KEY_IS_INDIRECT_KEY(&key)) { + /* there are no direct items in file */ + if (sd->u.sd_first_direct_byte != NO_BYTES_IN_DIRECT_ITEM) { + sd->u.sd_first_direct_byte = NO_BYTES_IN_DIRECT_ITEM; + mark_buffer_dirty (bh, 0); + } + } else { + /* there is at least one direct item */ + if (sd->u.sd_first_direct_byte != k_offset (&key) - (k_offset (&key) % g_sb.s_blocksize - 1)) { + sd->u.sd_first_direct_byte = k_offset (&key) - (k_offset (&key) % g_sb.s_blocksize - 1); + mark_buffer_dirty (bh, 0); + } + } + } else { + if (is_item_accessed (ih) == 0) + die ("check_regular_file: stat data item must be accessed"); + sd->sd_nlink ++; + mark_buffer_dirty (bh, 0); + } +} + + +static int is_rootdir_key (struct key * key) +{ + if (comp_keys (key, &root_key)) + return 0; + return 1; +} + +static int is_rootdir_entry_key (struct key * key) +{ + if (comp_short_keys (key, &root_key)) + return 0; + return 1; +} + + +/* when root direcotry can not be found */ +static void create_root_directory (struct path * path) +{ + struct item_head ih; + struct stat_data sd; + + /* insert stat data item */ + copy_key (&(ih.ih_key), &root_key); + ih.ih_item_len = SD_SIZE; + set_ih_free_space (&ih, MAX_US_INT); + mark_item_unaccessed (&ih); + + sd.sd_mode = S_IFDIR + 0755; + sd.sd_nlink = 0; + sd.sd_uid = 0; + sd.sd_gid = 0; + sd.sd_size = EMPTY_DIR_SIZE; + sd.sd_atime = sd.sd_ctime = sd.sd_mtime = time (NULL); + sd.u.sd_first_direct_byte = MAX_UL_INT; + + reiserfsck_insert_item (path, &ih, (char *)(&sd)); +} + + +static void paste_dot_and_dot_dot (struct path * path) +{ + char dir[EMPTY_DIR_SIZE]; + struct reiserfs_de_head * deh; + struct key key; + + copy_key (&key, &root_key); + + deh = (struct reiserfs_de_head *)dir; + deh[0].deh_offset = cpu_to_le64 (DOT_OFFSET); + deh[0].deh_dir_id = cpu_to_le32 (root_key.k_dir_id); + deh[0].deh_objectid = cpu_to_le32 (root_key.k_objectid); + deh[0].deh_state = 0; + set_bit (DEH_Visible, &(deh[0].deh_state)); + dir[DEH_SIZE] = '.'; + dir[DEH_SIZE + 1] = dir[DEH_SIZE + 2] = dir[DEH_SIZE + 3] = 0; + reiserfsck_paste_into_item (path, dir, DEH_SIZE + 4); + + set_k_offset (&key, DOT_DOT_OFFSET); + set_k_type (&key, TYPE_DIRENTRY); + if (usearch_by_entry_key (&g_sb, &key, path) == ENTRY_FOUND) { + reiserfs_warning ("paste_dot_and_dot_dot: \"..\" found\n"); + pathrelse (path); + return; + } + deh[0].deh_offset = cpu_to_le64 (DOT_DOT_OFFSET); + deh[0].deh_dir_id = 0; + deh[0].deh_objectid = cpu_to_le32 (root_key.k_dir_id); + deh[0].deh_state = 0; + set_bit (DEH_Visible, &(deh[0].deh_state)); + dir[DEH_SIZE] = dir[DEH_SIZE + 1] = '.'; + dir[DEH_SIZE + 2] = dir[DEH_SIZE + 3] = 0; + + reiserfsck_paste_into_item (path, dir, DEH_SIZE + 4); +} + + +/* used only to insert empty dir item for root directory */ +static void insert_dot_and_dot_dot (struct path * path) +{ + struct item_head ih; + char dir[EMPTY_DIR_SIZE]; + + ih.ih_key.k_dir_id = root_key.k_dir_id); + ih.ih_key.k_objectid = root_key.k_objectid); + set_k_offset (&(ih.ih_key), DOT_OFFSET); + set_k_type (&(ih.ih_key), TYPE_DIRENTRY); + ih.ih_item_len = cpu_to_le16 (EMPTY_DIR_SIZE); + ih.u.ih_entry_count = cpu_to_le16 (2); + mark_item_unaccessed (&ih); + + make_empty_dir_item (dir, root_key.k_dir_id, root_key.k_objectid, + 0, root_key.k_dir_id); + reiserfsck_insert_item (path, &ih, dir); +} + + +/* returns buffer, containing found directory item.*/ +static char * get_next_directory_item (struct path * path, struct key * key, struct key * parent, struct item_head * ih) +{ + char * dir_item; + struct key * rdkey; + struct buffer_head * bh; + struct reiserfs_de_head * deh; + int i; + int retval; + + if ((retval = usearch_by_entry_key (&g_sb, key, path)) != ENTRY_FOUND) { + if (k_offset (key) != DOT_OFFSET) + die ("get_next_directory_item: entry not found"); + + /* first directory item not found */ + if (is_rootdir_entry_key (key)) { + /* add "." and ".." to the root directory */ + if (retval == ENTRY_NOT_FOUND) + paste_dot_and_dot_dot (path); + else if (retval == DIRECTORY_NOT_FOUND) + insert_dot_and_dot_dot (path); + else + die ("get_next_directory_item: invalid return value"); + usearch_by_entry_key (&g_sb, key, path); + } else { + /* it is ok for directories but the root one that "." is not found */ + pathrelse (path); + return 0; + } + } + /* leaf containing directory item */ + bh = PATH_PLAST_BUFFER (path); + + memcpy (ih, PATH_PITEM_HEAD (path), IH_SIZE); + + /* make sure, that ".." exists as well */ + if (k_offset (key) == DOT_OFFSET) { + if (I_ENTRY_COUNT (ih) < 2) { + pathrelse (path); + return 0; + } + deh = B_I_DEH (bh, ih) + 1; + if (I_DEH_N_ENTRY_FILE_NAME_LENGTH (ih, deh, 1) != strlen ("..") || + memcmp ("..", B_I_E_NAME (bh, ih, 1), 2)) { + printf ("******get_next_directory_item: \"..\" not found***********\n"); + pathrelse (path); + return 0; + } + } + + deh = B_I_DEH (bh, ih); + + /* mark hidden entries as visible, reset ".." correctly */ + for (i = 0; i < I_ENTRY_COUNT (ih); i ++, deh ++) { + if (de_hidden (deh)) { + if (opt_verbose) + reiserfs_warning ("\nget_next_directory_item: hidden entry %d\n", i); + + mark_de_visible (deh); + mark_buffer_dirty (bh, 0); + } + if (deh->deh_offset == DOT_DOT_OFFSET) { + /* set ".." so that it points to the correct parent directory */ + if (comp_short_keys (&(deh->deh_dir_id), parent) && + deh->deh_objectid != REISERFS_ROOT_PARENT_OBJECTID) { + if (opt_verbose) + reiserfs_warning ("\nget_next_directory_item: \"..\" fixed\n"); + deh->deh_dir_id = key->k_dir_id; + deh->deh_objectid = key->k_objectid; + mark_buffer_dirty (bh, 0); + } + } + } + + /* copy directory item to the temporary buffer */ + dir_item = reiserfs_kamlloc (ih->ih_item_len); + memcpy (dir_item, B_I_PITEM (bh, ih), ih->ih_item_len); + + /* next item key */ + if (PATH_LAST_POSITION (path) == (B_NR_ITEMS (PATH_PLAST_BUFFER (path)) - 1) && + (rdkey = uget_rkey (path))) + copy_key (key, rdkey); + else { + key->k_dir_id = 0; + key->k_objectid = 0; + } + + mark_item_accessed (PATH_PITEM_HEAD (path), PATH_PLAST_BUFFER (path)); + return dir_item; +} + + +/* deh - le, key, entry_key, ih - cpu */ +static void get_object_key (struct reiserfs_de_head * deh, struct key * key, struct key * entry_key, struct item_head * ih) +{ + /* key of object pointed by direntry (deh) */ + key->k_dir_id = le32_to_cpu (deh->deh_dir_id); + key->k_objectid = le32_to_cpu (deh->deh_objectid); + set_k_offset_cpu (key, SD_OFFSET); + set_key_stat_data_cpu (key); + + /* key of direntry */ + entry_key->k_dir_id = ih->ih_key.k_dir_id; + entry_key->k_objectid = ih->ih_key.k_objectid; + set_k_offset_cpu (entry_key, deh->deh_offset); + set_key_direntry_cpu (entry_key); +} + + + +static void reiserfsck_cut_entry (struct key * cpukey) +{ + struct path path; + + if (usearch_by_entry_key (&g_sb, key, &path) != ENTRY_FOUND || k_offset_cpu (key) == DOT_OFFSET) + die ("reiserfsck_cut_entry: entry not found"); + + if (I_ENTRY_COUNT (PATH_PITEM_HEAD (&path)) == 1) + reiserfsck_delete_item (&path); + else { + struct reiserfs_de_head * deh = B_I_DEH (PATH_PLAST_BUFFER (&path), PATH_PITEM_HEAD (&path)) + path.pos_in_item; + reiserfsck_cut_from_item (&path, -(DEH_SIZE + I_DEH_N_ENTRY_LENGTH (PATH_PITEM_HEAD (&path), deh, path.pos_in_item))); + } +} + + + +/* check recursively the semantic tree. Returns 0 if entry points to + good object, and -1 or -2 if this entry must be deleted (stat data + not found or directory does have any items). Hard links are not + allowed, but if directory rename has been interrupted by the system + crash, it is possible, that fsck will find two entries (not "..") + pointing to the same directory. In this case fsck keeps only the + first one. */ +#define OK 0 +#define STAT_DATA_NOT_FOUND -1 +#define DIRECTORY_HAS_NO_ITEMS -2 + +static __u32 stat_datas = 0; + + + +int check_semantic_tree (struct key * key, struct key * parent, int is_dot_dot) +{ + struct path path; + struct stat_data * sd; + + if (!KEY_IS_STAT_DATA_KEY (key)) + die ("check_semantic_tree: key must be key of a stat data"); + + /* look for stat data of an object */ + if (usearch_by_key (&g_sb, key, &path, 0, DISK_LEAF_NODE_LEVEL, READ_BLOCKS, comp_keys) == ITEM_NOT_FOUND) { + if (is_rootdir_key (key)) { + /* stat data of the root directory not found. Make it */ + create_root_directory (&path); + usearch_by_key (&g_sb, key, &path, 0, DISK_LEAF_NODE_LEVEL, READ_BLOCKS, comp_keys); + } else { + pathrelse (&path); + return STAT_DATA_NOT_FOUND; + } + } + + sd = B_N_STAT_DATA (PATH_PLAST_BUFFER (&path), PATH_LAST_POSITION (&path)); + if ((sd->sd_nlink == 0) && ( opt_fsck == 0 )) + print_how_far (&stat_datas, get_event (STAT_DATA_ITEMS)); + + if ((sd->sd_mode & S_IFMT) != S_IFDIR) { + /* object is not a directory (regular, symlink, device file) */ + /*if ((sd->sd_mode & S_IFMT) == S_IFLNK) + printf ("Symlink found\n");*/ + + check_regular_file (&path, sd); + pathrelse (&path); + return OK; + } + + /* object is directory */ + sd->sd_nlink ++; + mark_buffer_dirty (PATH_PLAST_BUFFER (&path), 0); + if (sd->sd_nlink == 1) { + char * dir_item; + struct item_head ih; + struct key item_key, entry_key, object_key; + unsigned long dir_size = 0; + + /*print_how_far (&stat_datas, get_event (STAT_DATA_ITEMS));*/ + + if (key->k_objectid == REISERFS_ROOT_OBJECTID) + sd->sd_nlink ++; + + add_event (DIRECTORIES); + copy_key (&item_key, key); + item_key.k_offset = DOT_OFFSET; + item_key.k_uniqueness = DIRENTRY_UNIQUENESS; + pathrelse (&path); + while ((dir_item = get_next_directory_item (&path, &item_key, parent, &ih)) != 0) { + /* dir_item is copy of the item in separately allocated memory */ + int i; + int retval; + struct reiserfs_de_head * deh = (struct reiserfs_de_head *)dir_item + path.pos_in_item; + +/*&&&&&&&&&&&&&&&*/ + if (dir_size == 0) { + if (deh->deh_offset != DOT_OFFSET || (deh + 1)->deh_offset != DOT_DOT_OFFSET) + die ("check_semantic_tree: Directory without \".\" or \"..\""); + } +/*&&&&&&&&&&&&&&&*/ + + for (i = path.pos_in_item; i < I_ENTRY_COUNT (&ih); i ++, deh ++) { + get_object_key (deh, &object_key, &entry_key, &ih); + retval = check_semantic_tree (&object_key, key, + (deh->deh_offset == DOT_OFFSET ||deh->deh_offset == DOT_DOT_OFFSET) ? 1 : 0); + if (retval != OK) { + if (entry_key.k_offset == DOT_DOT_OFFSET && object_key.k_objectid == REISERFS_ROOT_PARENT_OBJECTID) { + /* ".." of root directory can not be found */ + if (retval != STAT_DATA_NOT_FOUND) + die ("check_semantic_tree: stat data of parent directory of root directory found"); + dir_size += DEH_SIZE + strlen (".."); + continue; + } + add_event (DELETED_ENTRIES); + reiserfsck_cut_entry (&entry_key); + } else { + /* OK */ + dir_size += DEH_SIZE + I_DEH_N_ENTRY_LENGTH (&ih, deh, i); + } + } + + freemem (dir_item); + + if (comp_short_keys (&item_key, key) != KEYS_IDENTICAL) { + pathrelse (&path); + break; + } + pathrelse (&path); + } + + if (dir_size == 0) + return DIRECTORY_HAS_NO_ITEMS; + + if (usearch_by_key (&g_sb, key, &path, 0, DISK_LEAF_NODE_LEVEL, READ_BLOCKS, comp_keys) != ITEM_FOUND) + die ("check_semantic_tree: stat data not found"); + + mark_objectid_as_used (PATH_PITEM_HEAD (&path)->ih_key.k_objectid); + + if (dir_size != (sd = B_N_STAT_DATA (PATH_PLAST_BUFFER (&path), PATH_LAST_POSITION (&path)))->sd_size) { + add_event (FIXED_SIZE_DIRECTORIES); + sd->sd_size = dir_size; + } + /* stat data of a directory is accessed */ + mark_item_accessed (PATH_PITEM_HEAD (&path), PATH_PLAST_BUFFER (&path)); + } else { + /* we have accessed directory stat data not for the first time. we + can come here only from "." or "..". Other names must be removed + to avoid creation of hard links */ + if (!is_dot_dot) { + sd->sd_nlink --; + if (opt_verbose) + reiserfs_warning ("\ncheck_semantic_tree: more than one name (neither \".\" nor \"..\") of a directory. Removed\n"); + pathrelse (&path); + return STAT_DATA_NOT_FOUND; + } + } + pathrelse (&path); + + + return OK; +} + + +struct key g_root_directory_key = {REISERFS_ROOT_PARENT_OBJECTID, REISERFS_ROOT_OBJECTID, 0, 0}; +struct key g_parent_root_directory_key = {0, REISERFS_ROOT_PARENT_OBJECTID, 0, 0}; + +void semantic_pass () +{ + + if ( opt_fsck == 0 ) + fprintf (stderr, "Pass 3 (semantic) - "); + check_semantic_tree (&g_root_directory_key, &g_parent_root_directory_key, 0); + if ( opt_fsck == 0 ) + printf ("\n"); +} + + diff -u -r --new-file linux/fs/reiserfs/utils/fsck/ubitmap.c v2.4.0-test8/linux/fs/reiserfs/utils/fsck/ubitmap.c --- linux/fs/reiserfs/utils/fsck/ubitmap.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/ubitmap.c Sun May 14 23:37:12 2000 @@ -0,0 +1,411 @@ +/* + * Copyright 1996-1999 Hans Reiser + */ + +#include "fsck.h" +#include "reiserfs.h" + + +/* g_disk_bitmap initially contains copy of disk bitmaps + (cautious version of it); + + g_new_bitmap initially has marked only super block, bitmap blocks + and bits after the end of bitmap + + in pass 1 we go through g_disk_bitmap. + + If block does not look like formatted node, we skip it. + + If block contains internal node, put 0 in g_disk_bitmap if block is + not used in new tree yet. + + If block contains leaf and is used already (by an indirect item + handled already to this time) save all items. They will be inserted + into tree after pass 1. + + If block looking like leaf is not used in the new tree, try to + insert in into tree. If it is not possible, mark block in + g_uninsertable_leaf_bitmap. Blocks marked in this bitmap will be inserted into tree in pass 2. They can not be + + This means, that in pass 1 when we have + found block containing the internal nodes we mark it in + g_disk_bitmap as free (reiserfs_free_internal_block). When block + gets into new tree it is marked in g_new_bitmap (mark_block_used) + When collecting resources for do_balance, we mark new blocks with + mark_block_used. After do_balance we unmark unused new blocks in + g_new_bitmap (bitmap.c:/reiserfs_free_block) + + Allocating of new blocks: look for 0 bit in g_disk_bitmap + (find_zero_bit_in_bitmap), make sure, that g_new_bitmap contains 0 + at the corresponding bit (is_block_used). + + */ + + + +int was_block_used (unsigned long block) +{ + int i, j; + + if (block >= SB_BLOCK_COUNT (&g_sb)) + die ("was_block_used: %d is too big (%d)\n", block, SB_BLOCK_COUNT (&g_sb)); + + i = block / (g_sb.s_blocksize * 8); + j = block % (g_sb.s_blocksize * 8); + return test_bit (j, g_disk_bitmap[i]); +} + + +/* is blocks used (marked by 1 in new bitmap) in the tree which is being built (as leaf, internal, + bitmap, or unformatted node) */ +int is_block_used (unsigned long block) +{ + int i, j; + + if(g_new_bitmap == 0) + return 0; + if (block >= SB_BLOCK_COUNT (&g_sb)) { + printf ("is_block_used: %ld is too big (%d)\n", block, SB_BLOCK_COUNT (&g_sb)); + return 1; + } + + i = block / (g_sb.s_blocksize * 8); + j = block % (g_sb.s_blocksize * 8); + return test_bit (j, g_new_bitmap[i]); +} + + +void mark_block_used (unsigned long block) +{ + int i, j; + + if (is_block_used (block)) + die ("mark_block_used: (%lu) used already", block); + + i = block / (g_sb.s_blocksize * 8); + j = block % (g_sb.s_blocksize * 8); + set_bit (j, g_new_bitmap[i]); + SB_FREE_BLOCKS (&g_sb)--; +} + +/*%%%%%%%%%%%%%%%%%%%%%%*/ +int is_block_formatted (unsigned long block) +{ + int i, j; + + i = block / (g_sb.s_blocksize * 8); + j = block % (g_sb.s_blocksize * 8); + return test_bit (j, g_formatted[i]); +} +int is_block_unformatted (unsigned long block) +{ + int i, j; + + i = block / (g_sb.s_blocksize * 8); + j = block % (g_sb.s_blocksize * 8); + return test_bit (j, g_unformatted[i]); +} +void mark_block_formatted (unsigned long block) +{ + int i, j; + + if (is_block_formatted (block) || is_block_unformatted (block)) + die ("mark_block_formatted: (%lu) used already", block); + + i = block / (g_sb.s_blocksize * 8); + j = block % (g_sb.s_blocksize * 8); + set_bit (j, g_formatted[i]); +} +void mark_block_unformatted (unsigned long block) +{ + int i, j; + + if (is_block_formatted (block) || is_block_unformatted (block)) + die ("mark_block_unformatted: (%lu) used already", block); + + + i = block / (g_sb.s_blocksize * 8); + j = block % (g_sb.s_blocksize * 8); + set_bit (j, g_unformatted[i]); +} +void unmark_block_formatted (unsigned long block) +{ + int i, j; + + if (!is_block_formatted (block) || is_block_unformatted (block)) + die ("unmark_block_formatted: (%lu) used already", block); + + i = block / (g_sb.s_blocksize * 8); + j = block % (g_sb.s_blocksize * 8); + clear_bit (j, g_formatted[i]); +} +void unmark_block_unformatted (unsigned long block) +{ + int i, j; + + if (is_block_formatted (block) || !is_block_unformatted (block)) + die ("unmark_block_unformatted: (%lu) used already", block); + + i = block / (g_sb.s_blocksize * 8); + j = block % (g_sb.s_blocksize * 8); + clear_bit (j, g_unformatted[i]); +} +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ + +/* uninsertable block is marked by bit clearing */ +void mark_block_uninsertable (unsigned long block) +{ + int i, j; + + if (is_block_used (block)) + die ("mark_block_uninsertable: (%lu) used already", block); + + i = block / (g_sb.s_blocksize * 8); + j = block % (g_sb.s_blocksize * 8); + clear_bit (j, g_uninsertable_leaf_bitmap[i]); +} + +int is_block_uninsertable (unsigned long block) +{ + int i, j; + + if (is_block_used (block)) + die ("is_block_uninsertable: (%lu) used already", block); + + i = block / (g_sb.s_blocksize * 8); + j = block % (g_sb.s_blocksize * 8); + return !test_bit (j, g_uninsertable_leaf_bitmap[i]); +} + +static inline void get_bit_address (struct super_block * s, unsigned long block, int * bmap_nr, int * offset) +{ + *bmap_nr = block / (s->s_blocksize << 3); + *offset = block % (s->s_blocksize << 3); + return; +} + +static inline int find_prev_zero_bit (void * addr, int offset) +{ + char * start; /* byte pointer to starting byte of search */ + int bit_offset; /* bit offset within starting byte of starting point */ + char mask; + + start = (char *)addr + (offset >> 3); + bit_offset = (offset % 8); + + mask = (unsigned int)0xff >> (7 - bit_offset); + while (start >= (char *)addr) { + if ((*start & mask) != mask) { + /* there is at least one 0 bit in current byte */ + for (; bit_offset >= 0; bit_offset --) { + if (!((1 << bit_offset) & *start)) + return ((start - (char *)addr) << 3) + bit_offset; + } + die ("find_prev_zero_bit: must be at least 1 zero bit"); + } + bit_offset = 7; + mask = (unsigned int)0xff; + start --; + } + /* there is no zero bit when we go from offset to the left up to addr */ + return -1; + +} + + +/* beginning from offset-th bit in bmap_nr-th bitmap block, + find_forward finds the closest zero bit. It returns 1 and zero + bit address (bitmap, offset) if zero bit found or 1 if there is no + zero bits in forward direction */ +static int find_forward (struct super_block * s, int * bmap_nr, int * offset) +{ + int i, j; + struct buffer_head * bh; + + for (i = *bmap_nr; i < SB_BMAP_NR (s); i ++, *offset = 0) { + /* get corresponding bitmap block */ + bh = SB_AP_BITMAP (s)[i];/*g_disk_bitmap[i];*/ + while (*offset < (s->s_blocksize << 3)) { + j = find_next_zero_bit ((unsigned long *)bh->b_data, s->s_blocksize << 3, *offset); + if (j < (s->s_blocksize << 3)) { + *bmap_nr = i; + *offset = j; + + /* we found free block in disk bitmap, make sure, that it is + not used in new built tree yet */ + if (is_block_used (i * (s->s_blocksize << 3) + j)) { + (*offset) ++; + continue; + } + return 1; + } + break; /* while */ + } + } /* for */ + + /* zero bit not found */ + return 0; +} + + +/* this does the same as find_forward does, but in backward direction */ +static int find_backward (struct super_block * s, int * bmap_nr, int * offset) +{ + int i, j; + struct buffer_head * bh; + + for (i = *bmap_nr; i > -1; i --, *offset = (s->s_blocksize << 3) - 1) { + /* get corresponding bitmap block */ + bh = SB_AP_BITMAP (s)[i];/*g_disk_bitmap[i];*/ + + /* at first we start from position, in next bitmap block we start from 0th position */ + while (*offset > -1) { + j = find_prev_zero_bit ((unsigned long *)bh->b_data, *offset); + if (j != -1) { + *bmap_nr = i; + *offset = j; + + /* we found free block in disk bitmap, make sure, that it is not used in new built tree yet */ + if (is_block_used (i * (s->s_blocksize << 3) + j)) { + (*offset) --; + continue; + } + return 1; + } + break; /* from while */ + } + + /* in previous bitmap block we start from the end */ +/* *offset = (s->s_blocksize << 3) - 1;*/ + } /* for */ + + /* zero bit not found */ + return 0; +} + + +static unsigned long find_zero_bit_in_bitmap (struct super_block * s, unsigned long search_start) +{ + int bmap_nr, offset; + + /* get bit location (bitmap number and bit offset) of search_start block */ + get_bit_address (s, search_start, &bmap_nr, &offset); + + /* first we are going to the right (as elevator_direction requires) */ + if (find_forward (s, &bmap_nr, &offset) == 0) { + /* there wasn't a free block with number greater than our + starting point, so we are going to do find_backward */ + get_bit_address (s, search_start, &bmap_nr, &offset); + if (find_backward (s, &bmap_nr, &offset) == 0) + return 0; + } + + + /* ok, mark block in new bitmap */ + mark_block_used (bmap_nr * (s->s_blocksize << 3) + offset); + return (bmap_nr * (s->s_blocksize << 3)) + offset; +} + + +/* mark block free in bitmap we use to build the tree */ +void reiserfs_free_internal_block (struct super_block * s, unsigned long block) +{ + int i, j; + + i = block / (s->s_blocksize * 8); + j = block % (s->s_blocksize * 8); + + if (test_bit (j, SB_AP_BITMAP (s)[i]->b_data) == 0) + die ("reiserfs_free_internal_block: Block %lu is free", block); + + clear_bit (j, SB_AP_BITMAP (s)[i]->b_data); + g_old_rs->s_free_blocks ++; +} + + +static void try_to_free_unused_internal_blocks (int to_free) +{ + int i, j, k; + int freed = 0; + struct buffer_head * bh; + int block; + + printf ("\nforce_freeing: Trying to find free internal nodes block..\n"); + for (i = 0; i < SB_BMAP_NR (&g_sb); i ++) + for (j = 0; j < g_sb.s_blocksize; j ++) { + if (i * g_sb.s_blocksize * 8 + j * 8 == SB_BLOCK_COUNT (&g_sb)) + goto out_of_bitmap; + for (k = 0; k < 8; k ++) { + block = i * g_sb.s_blocksize * 8 + j * 8 + k; + if (is_block_used (block/*i * g_sb.s_blocksize * 8 + j * 8 + k*/)) + continue; + bh = bread (g_sb.s_dev, i * g_sb.s_blocksize * 8 + j * 8 + k, g_sb.s_blocksize); + if (not_formatted_node (bh->b_data, g_sb.s_blocksize)) { + brelse (bh); + continue; + } + /* this node is formatted node. we can free internal node */ + if (B_IS_KEYS_LEVEL (bh)/*is_internal_node (bh->b_data)*/) { + reiserfs_free_internal_block (&g_sb, bh->b_blocknr); + freed ++; + if (freed == to_free) { + goto out_of_bitmap; + brelse (bh); + } + } + brelse (bh); + } + } +out_of_bitmap: + if (freed == 0) + die ("Can not find free blocks on device"); +} + + +int reiserfs_new_blocknrs (struct super_block * s, unsigned long * free_blocknrs, unsigned long start, int amount_needed, int not_used) +{ + while (amount_needed --) { + *free_blocknrs = find_zero_bit_in_bitmap (s, start); + if (*free_blocknrs == 0) { + /* this will die if it freed no blocks */ + try_to_free_unused_internal_blocks (10); + continue; + } + free_blocknrs ++; + } + + return CARRY_ON; +} + + +struct buffer_head * reiserfsck_get_new_buffer (unsigned long start) +{ + unsigned long blocknr = 0; + struct buffer_head * bh; + + reiserfs_new_blocknrs (&g_sb, &blocknr, start, 1, 0); + + bh = getblk (g_sb.s_dev, blocknr, g_sb.s_blocksize); + if (buffer_uptodate (bh)) + die ("reiserfsck_get_new_buffer: found uptodate buffer for new blocknr"); + + return bh; +} + + +/* free block in new bitmap */ +void reiserfs_free_block (struct super_block * s, unsigned long block) +{ + int i, j; + + i = block / (s->s_blocksize * 8); + j = block % (s->s_blocksize * 8); + + if (test_bit (j, g_new_bitmap[i]) == 0) + die ("reiserfs_free_block: Block %lu is free", block); + + clear_bit (j, g_new_bitmap[i]); + SB_FREE_BLOCKS (&g_sb)++; + +} + diff -u -r --new-file linux/fs/reiserfs/utils/fsck/ufile.c v2.4.0-test8/linux/fs/reiserfs/utils/fsck/ufile.c --- linux/fs/reiserfs/utils/fsck/ufile.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/ufile.c Sun May 14 23:37:12 2000 @@ -0,0 +1,997 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser + */ +#include "fsck.h" + +#if 0 +static int is_bad_sd (struct item_head * ih, char * item) +{ + struct stat_data * sd = (struct stat_data *)item; + + if (!S_ISDIR (sd->sd_mode) && !S_ISREG(sd->sd_mode) && + !S_ISCHR (sd->sd_mode) && !S_ISBLK(sd->sd_mode) && + !S_ISLNK (sd->sd_mode)) { + reiserfs_warning ("is_bad_sd: \ +stat data item (%h) has sd_mode 0%o. Skipped\n", ih, sd->sd_mode); + return 1; + } + if ((sd->sd_first_direct_byte != NO_BYTES_IN_DIRECT_ITEM && + sd->sd_first_direct_byte >= sd->sd_size) || + sd->sd_size > MAX_INT) { + reiserfs_warning ("is_bad_sd: \ +stat data item (%h) has sd_size %d, first direct byte %d\n", ih, sd->sd_size, + sd->sd_first_direct_byte); + return 1; + } + if (sd->sd_nlink > 100) { + reiserfs_warning ("is_bad_sd: \ +stat data item (%h) has sd_nlink %d\n", sd->sd_nlink); + return 1; + } + return 0; +} + + +static int is_bad_directory (struct item_head * ih, char * item) +{ + int i; + int namelen; + struct reiserfs_de_head * deh = (struct reiserfs_de_head *)item; + __u32 prev_offset = 0; + __u16 prev_location = 0xffff; + + for (i = 0; i < I_ENTRY_COUNT (ih); i ++) { + namelen = I_DEH_N_ENTRY_FILE_NAME_LENGTH (ih, deh + i, i); + if (namelen > REISERFS_MAX_NAME_LEN (g_sb.s_blocksize)) { + reiserfs_warning ("is_bad_directory: dir item %h has too long name (%d)\n", ih, namelen); + return 1; + } + if (deh[i].deh_offset <= prev_offset) { + reiserfs_warning ("is_bad_directory: dir item %h has invalid header array \ +(offsets: prev %u, %d-th cur %u)\n", ih, prev_offset, i, deh[i].deh_offset); + return 1; + } + prev_offset = deh[i].deh_offset; + + if (deh[i].deh_location >= prev_location) { + reiserfs_warning ("is_bad_directory: dir item %h has invalid header array \ +(locations: prev %u, %d-th cur %u)\n", ih, prev_location, i, deh[i].deh_location); + return 1; + } + } + + return 0; +} + + +/* change incorrect block adresses by 0. Do not consider such item as incorrect */ +static int is_bad_indirect (struct item_head * ih, char * item) +{ + int i; + + for (i = 0; i < I_UNFM_NUM (ih); i ++) { + __u32 * ind = (__u32 *)item; + + if (ind[i] >= SB_BLOCK_COUNT (&g_sb)) { + /*reiserfs_warning ("is_bad_indirect: block address (%lu) in indirect item. Super block block count == %u\n", + ind[i], SB_BLOCK_COUNT (&g_sb));*/ + ind[i] = 0; + continue; + } + if (is_block_used (ind[i])) { + ind[i] = 0; + continue; + } + } + return 0; +} + + +int is_bad_item (struct item_head * ih, char * item) +{ + if (I_IS_STAT_DATA_ITEM (ih)) + return is_bad_sd (ih, item); + + if (I_IS_DIRECTORY_ITEM (ih)) + return is_bad_directory (ih, item); + + if (I_IS_INDIRECT_ITEM (ih)) + return is_bad_indirect (ih, item); + + return 0; +} +#endif /* 0 */ + +int is_bad_item (struct item_head *, char *, int, int); + +/* append item to end of list. Set head if it is 0 */ +void save_item (struct si ** head, struct item_head * ih, char * item) +{ + struct si * si, * cur; + int i; + + if (is_bad_item (ih, item, g_sb.s_blocksize, g_sb.s_dev)) { + return; + } + + if (I_IS_INDIRECT_ITEM (ih)) + for (i = 0; i < I_UNFM_NUM (ih); i ++) { + __u32 * ind = (__u32 *)item; + + if (ind[i] >= SB_BLOCK_COUNT (&g_sb) || + !was_block_used (ind[i]) || + is_block_used (ind[i]) || + is_block_uninsertable (ind[i])) { + ind[i] = 0; + continue; + } + } + + si = reiserfs_kmalloc (sizeof (*si)); + si->si_dnm_data = reiserfs_kmalloc (ih->ih_item_len); + memcpy (&(si->si_ih), ih, IH_SIZE); + memcpy (si->si_dnm_data, item, ih->ih_item_len); + + + if (*head == 0) + *head = si; + else { + cur = *head; + while (cur->si_next) + cur = cur->si_next; + cur->si_next = si; + } + return; +} + + +static struct si * save_and_delete_file_item (struct si * si, struct path * path) +{ + struct buffer_head * bh = PATH_PLAST_BUFFER (path); + struct item_head * ih = PATH_PITEM_HEAD (path); + + save_item (&si, ih, B_I_PITEM (bh, ih)); + + reiserfsck_delete_item (path); + return si; +} + + +static struct si * remove_saved_item (struct si * si) +{ + struct si * tmp = si->si_next; + + freemem (si->si_dnm_data); + freemem (si); + return tmp; +} + + +void put_saved_items_into_tree (struct si * si) +{ + while (si) { + insert_item_separately (&si, &(si->si_ih), si->si_dnm_data); +/* reiserfsck_file_write (&(si->si_ih), si->si_dnm_data);*/ + si = remove_saved_item (si); + } +} + + +/* path points to an item or behind last item of the node */ +/* +static int next_item_of_other_object (struct key * key, struct path * path) +{ + struct key * next_key; + + if (PATH_LAST_POSITION (path) < B_NR_ITEMS (PATH_PLAST_BUFFER (path))) + next_key = B_N_PKEY (PATH_PLAST_BUFFER (path), PATH_LAST_POSITION (path)); + else + next_key = get_right_dkey (path); + + if (next_key == 0 || comp_short_keys (key, next_key) != KEYS_IDENTICAL) + return YES; + return NO; +} +*/ + + +static int do_items_have_the_same_type (struct key * key1, struct key * key2) +{ + return (key1->k_uniqueness == key2->k_uniqueness) ? 1 : 0; +} + +static int are_items_in_the_same_node (struct path * path) +{ + return (PATH_LAST_POSITION (path) < B_NR_ITEMS (PATH_PLAST_BUFFER (path)) - 1) ? 1 : 0; +} + + +static struct key * get_next_key (struct path * path) +{ + if (PATH_LAST_POSITION (path) < B_NR_ITEMS (PATH_PLAST_BUFFER (path)) - 1) + return B_N_PKEY (PATH_PLAST_BUFFER (path), PATH_LAST_POSITION (path) + 1); + return uget_rkey (path); +} + + +/* whether last unfm pointer must be and can be converted to direct item */ +static int can_indirect_item_be_converted (struct item_head * ih) +{ + unsigned long file_size = ih->ih_key.k_offset + I_BYTES_NUMBER (ih, g_sb.s_blocksize) - 1; + unsigned long tail_size = g_sb.s_blocksize - get_ih_free_space (ih); + + if (!STORE_TAIL_IN_UNFM (file_size, tail_size, g_sb.s_blocksize) && + I_IS_INDIRECT_ITEM (ih)/* && tail_size <= MAX_DIRECT_ITEM_LEN (g_sb.s_blocksize)*/) + return 1; + return 0; +} + + +int do_make_tails () +{ + return 1;/*SB_MAKE_TAIL_FLAG (&g_sb) == MAKE_TAILS ? YES : NO;*/ +} + + +static void cut_last_unfm_pointer (struct path * path, struct item_head * ih) +{ + set_ih_free_space (ih, 0); + if (I_UNFM_NUM (ih) == 1) + reiserfsck_delete_item (path); + else + reiserfsck_cut_from_item (path, -UNFM_P_SIZE); +} + + +static unsigned long indirect_to_direct (struct path * path) +{ + struct buffer_head * bh = PATH_PLAST_BUFFER (path); + struct item_head * ih = PATH_PITEM_HEAD (path); + unsigned long unfm_ptr; + struct buffer_head * unfm_bh = 0; + struct item_head ins_ih; + char * buf; + int len; + unsigned long offset; + + + add_event (INDIRECT_TO_DIRECT); + + unfm_ptr = B_I_POS_UNFM_POINTER (bh, ih, I_UNFM_NUM (ih) - 1); + + + /* direct item to insert */ + ins_ih.ih_key.k_dir_id = ih->ih_key.k_dir_id; + ins_ih.ih_key.k_objectid = ih->ih_key.k_objectid; + ins_ih.ih_key.k_offset = ih->ih_key.k_offset + (I_UNFM_NUM (ih) - 1) * bh->b_size; + offset = ins_ih.ih_key.k_offset; + ins_ih.ih_key.k_uniqueness = TYPE_DIRECT; + ins_ih.ih_item_len = g_sb.s_blocksize - get_ih_free_space (ih); + len = ins_ih.ih_item_len; + set_ih_free_space (&ins_ih, MAX_US_INT); + ins_ih.ih_reserved = 0; + + /* get buffer filled with 0s */ + buf = reiserfs_kmalloc (len); + if (unfm_ptr) { + unfm_bh = bread (bh->b_dev, unfm_ptr, bh->b_size); + memcpy (buf, unfm_bh->b_data, ins_ih.ih_item_len); + brelse (unfm_bh); + } + + + path->pos_in_item = I_UNFM_NUM (ih) - 1; + cut_last_unfm_pointer (path, ih); + + /* insert direct item */ + if (usearch_by_key (&g_sb, &(ins_ih.ih_key), path, 0, DISK_LEAF_NODE_LEVEL, READ_BLOCKS, comp_keys) == ITEM_FOUND) + die ("indirect_to_direct: key must be not found"); + reiserfsck_insert_item (path, &ins_ih, (const char *)(buf)); + + + freemem (buf); + + /* put to stat data offset of first byte in direct item */ + return offset; +} + + +/* when it returns, key->k_offset is offset of the last item of file */ +int are_file_items_correct (struct key * key, unsigned long * size, int mark_passed_items, + struct path * path_to_sd, struct stat_data ** sd) +{ + struct path path; + int retval; + struct item_head * ih; + struct key * next_key; + + *size = 0; + key->k_offset = 1; + key->k_uniqueness = TYPE_DIRECT; + path.path_length = ILLEGAL_PATH_ELEMENT_OFFSET; + + do { + retval = usearch_by_position (&g_sb, key, &path); + if (retval == BYTE_FOUND && path.pos_in_item != 0) + die ("are_file_items_correct: all bytes we look for must be found at position 0"); + + switch (retval) { + case BYTE_FOUND:/**/ + ih = PATH_PITEM_HEAD (&path); + key->k_uniqueness = ih->ih_key.k_uniqueness; + if (mark_passed_items == 1) { + mark_item_accessed (ih, PATH_PLAST_BUFFER (&path)); + } + next_key = get_next_key (&path); + if (next_key == 0 || comp_short_keys (key, next_key) != KEYS_IDENTICAL || + (!KEY_IS_INDIRECT_KEY (next_key) && !KEY_IS_DIRECT_KEY (next_key))) { + /* next item does not exists or is of another object, therefore all items of file are correct */ + *size = key->k_offset + I_BYTES_NUMBER (ih, g_sb.s_blocksize) - 1; + + if (mark_passed_items == 1 && do_make_tails () == 1 && can_indirect_item_be_converted (ih) == 1) { + struct key sd_key; + unsigned long first_direct_byte; + + first_direct_byte = indirect_to_direct (&path); + /* we have to research stat data of object after converting */ + pathrelse (path_to_sd); + copy_key (&sd_key, key); + sd_key.k_offset = SD_OFFSET; + sd_key.k_uniqueness = SD_UNIQUENESS; + if (usearch_by_key (&g_sb, &(sd_key), path_to_sd, 0, DISK_LEAF_NODE_LEVEL, READ_BLOCKS, comp_keys) != ITEM_FOUND) + die ("are_file_items_correct: stat data not found"); + *sd = B_N_STAT_DATA (PATH_PLAST_BUFFER (path_to_sd), PATH_LAST_POSITION (path_to_sd)); + /* last item of the file is direct item */ + key->k_offset = first_direct_byte; + key->k_uniqueness = TYPE_DIRECT; + } else + pathrelse (&path); + return 1; + } + /* next item is item of this file */ + if ((I_IS_INDIRECT_ITEM (ih) && + ih->ih_key.k_offset + g_sb.s_blocksize * I_UNFM_NUM (ih) != next_key->k_offset) || + (I_IS_DIRECT_ITEM (ih) && ih->ih_key.k_offset + ih->ih_item_len != next_key->k_offset)) { + /* next item has incorrect offset (hole or overlapping) */ + *size = key->k_offset + I_BYTES_NUMBER (ih, g_sb.s_blocksize) - 1; + pathrelse (&path); + return 0; + } + if (do_items_have_the_same_type (&(ih->ih_key), next_key) == 1 && are_items_in_the_same_node (&path) == 1) { + /* two indirect items or two direct items in the same leaf */ + *size = key->k_offset + I_BYTES_NUMBER (ih, g_sb.s_blocksize) - 1; + pathrelse (&path); + return 0; + } + /* items are of different types or are in different nodes */ + if (ih->ih_key.k_offset + I_BYTES_NUMBER (ih, g_sb.s_blocksize) != next_key->k_offset) { + /* indirect item free space is not set properly */ + if (!I_IS_INDIRECT_ITEM (ih) || get_ih_free_space (ih) == 0) + die ("are_file_items_correct: item must be indirect and must have invalid free space (%d)", + get_ih_free_space (ih)); + + set_ih_free_space (ih, 0); + mark_buffer_dirty (PATH_PLAST_BUFFER (&path), 0); + if (ih->ih_key.k_offset + I_BYTES_NUMBER (ih, g_sb.s_blocksize) != next_key->k_offset) + die ("are_file_items_correct: invalid offset"); + } + /* next item exists */ + key->k_offset = next_key->k_offset; + pathrelse (&path); + break; + + case BYTE_NOT_FOUND: + if (key->k_offset != 1) + die ("are_file_items_correct: byte can be not found only when it is first byte of file"); + pathrelse (&path); + return 0; + + case FILE_NOT_FOUND: + if (key->k_offset != 1) + die ("are_file_items_correct: there is no items of this file, byte 0 found though"); + pathrelse (&path); + return 1; + + case DIRECTORY_FOUND: + pathrelse (&path); + return 0; + } + } while (1); + + die ("are_file_items_correct: code can not reach here"); + return 0; +} + + +/* file must have correct sequence of items and tail must be stored in + unformatted pointer */ +static int make_file_writeable (struct item_head * ih) +{ + struct key key; + struct key * rkey; + struct path path; + struct item_head * path_ih; + struct si * si = 0; + unsigned long size; + int mark_passed_items; + int retval; + + copy_key (&key, &(ih->ih_key)); + + if ((retval = are_file_items_correct (&key, &size, mark_passed_items = 0, 0, 0)) == 1) + /* this file looks good (or there is no any items of it) */ + return 1; + + if (retval == -1) { + /* there is an object with this key and it is directory */ + return -1; + } + + /* rewrite file */ + + + /* look for all items of file, store them and delete */ + key.k_offset = 1; + while (1) { + usearch_by_key (&g_sb, &key, &path, 0, DISK_LEAF_NODE_LEVEL, READ_BLOCKS, comp_keys_3); + if (PATH_LAST_POSITION (&path) == B_NR_ITEMS (PATH_PLAST_BUFFER (&path))) { + rkey = uget_rkey (&path); + if (rkey && comp_short_keys (&key, rkey) == KEYS_IDENTICAL) { + /* file continues in the right neighbor */ + copy_key (&key, rkey); + pathrelse (&path); + continue; + } + /* there is no more items of file */ + pathrelse (&path); + break; + } + path_ih = PATH_PITEM_HEAD (&path); + if (comp_short_keys (&key, &(path_ih->ih_key)) != KEYS_IDENTICAL) { + pathrelse (&path); + break; + } + si = save_and_delete_file_item (si, &path); + } + + /* put all items back into tree */ + put_saved_items_into_tree (si); + + add_event (REWRITTEN_FILES); + +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ + copy_key (&key, &(ih->ih_key)); + size = 0; + if (are_file_items_correct (&key, &size, mark_passed_items = 0, 0, 0) == 0) { + die ("file still incorrect\n"); + } +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ + + return 1; + +} + + +/* this inserts __first__ indirect item (having k_offset == 1 and only + one unfm pointer) into tree */ +static int create_first_item_of_file (struct item_head * ih, char * item, struct path * path, int *pos_in_coming_item) +{ + unsigned long unfm_ptr; + struct buffer_head * unbh; + struct item_head indih; + int retval; + + if (ih->ih_key.k_offset > g_sb.s_blocksize) { + /* insert indirect item containing 0 unfm pointer */ + unfm_ptr = 0; + set_ih_free_space (&indih, 0); + retval = 0; + } else { + if (I_IS_DIRECT_ITEM (ih)) { + /* copy direct item to new unformatted node. Save information about it */ + + unbh = reiserfsck_get_new_buffer (PATH_PLAST_BUFFER (path)->b_blocknr); + unfm_ptr = unbh->b_blocknr; + +/* this is for check only */ +mark_block_unformatted (unfm_ptr); + memcpy (unbh->b_data + ih->ih_key.k_offset - 1, item, ih->ih_item_len); + + save_unfm_overwriting (unfm_ptr, ih); + + set_ih_free_space (&indih, g_sb.s_blocksize - ih->ih_item_len - (ih->ih_key.k_offset - 1)); + mark_buffer_dirty (unbh, 0); + mark_buffer_uptodate (unbh, 0); + brelse (unbh); + retval = ih->ih_item_len; + } else { + /* take first unformatted pointer from an indirect item */ + unfm_ptr = *(unsigned long *)item;/*B_I_POS_UNFM_POINTER (bh, ih, 0);*/ + if (!is_block_used (unfm_ptr) && !is_block_uninsertable (unfm_ptr)) { + mark_block_used (unfm_ptr); +/* this is for check only */ +mark_block_unformatted (unfm_ptr); + } else { + unfm_ptr = 0; + } + set_ih_free_space (&indih, (ih->ih_item_len == UNFM_P_SIZE) ? get_ih_free_space (ih) : 0); + retval = g_sb.s_blocksize - get_ih_free_space (&indih); + (*pos_in_coming_item) ++; + } + } + copy_key (&(indih.ih_key), &(ih->ih_key)); + indih.ih_key.k_offset = 1; + indih.ih_key.k_uniqueness = TYPE_INDIRECT; + indih.ih_item_len = UNFM_P_SIZE; + mark_item_unaccessed (&indih); + reiserfsck_insert_item (path, &indih, (const char *)&unfm_ptr); + return retval; +} + + +/* path points to first part of tail. Function copies file tail into unformatted node and returns + its block number. If we are going to overwrite direct item then keep free space (keep_free_space + == YES). Else (we will append file) set free space to 0 */ +/* we convert direct item that is on the path to indirect. we need a number of free block for + unformatted node. reiserfs_new_blocknrs will start from block number returned by this function */ +static unsigned long block_to_start (struct path * path) +{ + struct buffer_head * bh; + struct item_head * ih; + + bh = PATH_PLAST_BUFFER (path); + ih = PATH_PITEM_HEAD (path); + if (ih->ih_key.k_offset == 1 || PATH_LAST_POSITION (path) == 0) + return bh->b_blocknr; + + ih --; + return (B_I_POS_UNFM_POINTER (bh, ih, I_UNFM_NUM (ih) - 1)) ?: bh->b_blocknr; +} + + +static void direct2indirect (unsigned long unfm, struct path * path, int keep_free_space) +{ + struct item_head * ih; + struct key key; + struct buffer_head * unbh; + struct unfm_nodeinfo ni; + int copied = 0; + + copy_key (&key, &(PATH_PITEM_HEAD (path)->ih_key)); + + if (key.k_offset % g_sb.s_blocksize != 1) { + /* look for first part of tail */ + pathrelse (path); + key.k_offset -= (key.k_offset % g_sb.s_blocksize - 1); + if (usearch_by_key (&g_sb, &key, path, 0, DISK_LEAF_NODE_LEVEL, READ_BLOCKS, comp_keys) != ITEM_FOUND) + die ("direct2indirect: can not find first part of tail"); + } + + unbh = reiserfsck_get_new_buffer (unfm ?: block_to_start (path)); + + /* delete parts of tail coping their contents to new buffer */ + do { + ih = PATH_PITEM_HEAD (path); + memcpy (unbh->b_data + copied, B_I_PITEM (PATH_PLAST_BUFFER (path), ih), ih->ih_item_len); + + save_unfm_overwriting (unbh->b_blocknr, ih); + + copied += ih->ih_item_len; + key.k_offset += ih->ih_item_len; + reiserfsck_delete_item (path); + } while (/*reiserfsck_*/usearch_by_key (&g_sb, &key, path, 0, DISK_LEAF_NODE_LEVEL, READ_BLOCKS, comp_keys) == ITEM_FOUND); + + pathrelse (path); + + /* paste or insert pointer to the unformatted node */ + key.k_offset -= copied; + ni.unfm_nodenum = unbh->b_blocknr; + ni.unfm_freespace = (keep_free_space == 1) ? (g_sb.s_blocksize - copied) : 0; + +/* this is for check only */ +mark_block_unformatted (ni.unfm_nodenum); + + if (usearch_by_position (&g_sb, &key, path) == FILE_NOT_FOUND) { + struct item_head insih; + + copy_key (&(insih.ih_key), &key); + insih.ih_key.k_uniqueness = TYPE_INDIRECT; + set_ih_free_space (&insih, ni.unfm_freespace); + mark_item_unaccessed (&insih); + insih.ih_item_len = UNFM_P_SIZE; + reiserfsck_insert_item (path, &insih, (const char *)&(ni.unfm_nodenum)); + } else { + ih = PATH_PITEM_HEAD (path); + if (!I_IS_INDIRECT_ITEM (ih) || ih->ih_key.k_offset + I_BYTES_NUMBER (ih, g_sb.s_blocksize) != key.k_offset) + die ("direct2indirect: incorrect item found"); + reiserfsck_paste_into_item (path, (const char *)&ni, UNFM_P_SIZE); + } + + mark_buffer_dirty (unbh, 0); + mark_buffer_uptodate (unbh, 0); + brelse (unbh); + + if (usearch_by_position (&g_sb, &key, path) != BYTE_FOUND || !I_IS_INDIRECT_ITEM (PATH_PITEM_HEAD (path))) + die ("direct2indirect: position not found"); + return; +} + + + + +static int append_to_unformatted_node (struct item_head * comingih, struct item_head * ih, char * item, struct path * path) +{ + struct buffer_head * bh, * unbh; + int end_of_data = g_sb.s_blocksize - get_ih_free_space (ih); + int offset = comingih->ih_key.k_offset % g_sb.s_blocksize - 1; + int zero_number = offset - end_of_data; + __u32 unfm_ptr; + + /* append to free space of the last unformatted node of indirect item ih */ + if (get_ih_free_space (ih) < comingih->ih_item_len) + die ("reiserfsck_append_file: there is no enough free space in unformatted node"); + + bh = PATH_PLAST_BUFFER (path); + + unfm_ptr = B_I_POS_UNFM_POINTER (bh, ih, I_UNFM_NUM (ih) - 1); + if (unfm_ptr == 0 || unfm_ptr >= SB_BLOCK_COUNT (&g_sb)) { + unbh = reiserfsck_get_new_buffer (bh->b_blocknr); + B_I_POS_UNFM_POINTER (bh, ih, I_UNFM_NUM (ih) - 1) = unbh->b_blocknr; + mark_block_unformatted (unbh->b_blocknr); + mark_buffer_dirty (bh, 0); + } else { + unbh = bread (g_sb.s_dev, unfm_ptr, g_sb.s_blocksize); + if (!is_block_used (unfm_ptr)) + die ("append_to_unformatted_node: unused block %d", unfm_ptr); + + } + memset (unbh->b_data + end_of_data, 0, zero_number); + memcpy (unbh->b_data + offset, item, comingih->ih_item_len); + + save_unfm_overwriting (unbh->b_blocknr, comingih); + + set_ih_free_space (ih, get_ih_free_space (ih) - (zero_number + comingih->ih_item_len)); + memset (unbh->b_data + offset + comingih->ih_item_len, 0, get_ih_free_space (ih)); + mark_buffer_uptodate (unbh, 0); + mark_buffer_dirty (unbh, 0); + brelse (unbh); + pathrelse (path); + return comingih->ih_item_len; +} + + +static void adjust_free_space (struct buffer_head * bh, struct item_head * ih, struct item_head * comingih) +{ + if (I_IS_INDIRECT_ITEM (comingih)) { + set_ih_free_space (ih, 0); + } else { + if (comingih->ih_key.k_offset < ih->ih_key.k_offset + g_sb.s_blocksize * I_UNFM_NUM (ih)) + /* append to the last unformatted node */ + set_ih_free_space (ih, g_sb.s_blocksize - ih->ih_key.k_offset % g_sb.s_blocksize + 1); + else + set_ih_free_space (ih, 0); + } + + mark_buffer_dirty (bh, 0); +} + + +/* this appends file with one unformatted node pointer (since balancing algorithm limitation). This + pointer can be 0, or new allocated block or pointer from indirect item that is being inserted + into tree */ +int reiserfsck_append_file (struct item_head * comingih, char * item, int pos, struct path * path) +{ + struct unfm_nodeinfo ni; + struct buffer_head * unbh; + int retval; +/* int keep_free_space;*/ + struct item_head * ih = PATH_PITEM_HEAD (path); + + if (!I_IS_INDIRECT_ITEM (ih)) + die ("reiserfsck_append_file: can not append to non-indirect item"); + + if (ih->ih_key.k_offset + I_BYTES_NUMBER (ih, g_sb.s_blocksize) != comingih->ih_key.k_offset) { + adjust_free_space (PATH_PLAST_BUFFER (path), ih, comingih); + } + + if (I_IS_DIRECT_ITEM (comingih)) { + if (comingih->ih_key.k_offset < ih->ih_key.k_offset + g_sb.s_blocksize * I_UNFM_NUM (ih)) { + /* direct item fits to free space of indirect item */ + return append_to_unformatted_node (comingih, ih, item, path); + } + + unbh = reiserfsck_get_new_buffer (PATH_PLAST_BUFFER (path)->b_blocknr); + /* this is for check only */ + mark_block_unformatted (unbh->b_blocknr); + memcpy (unbh->b_data + comingih->ih_key.k_offset % unbh->b_size - 1, item, comingih->ih_item_len); + + save_unfm_overwriting (unbh->b_blocknr, comingih); + + mark_buffer_dirty (unbh, 0); + mark_buffer_uptodate (unbh, 0); + + ni.unfm_nodenum = unbh->b_blocknr; + ni.unfm_freespace = g_sb.s_blocksize - comingih->ih_item_len - (comingih->ih_key.k_offset % unbh->b_size - 1); + brelse (unbh); + retval = comingih->ih_item_len; + } else { + /* coming item is indirect item */ + if (comingih->ih_key.k_offset + pos * g_sb.s_blocksize != ih->ih_key.k_offset + I_BYTES_NUMBER (ih, g_sb.s_blocksize)) + die ("reiserfsck_append_file: can not append indirect item (%lu) to position (%lu + %lu)", + comingih->ih_key.k_offset, ih->ih_key.k_offset, I_BYTES_NUMBER (ih, g_sb.s_blocksize)); + + /* take unformatted pointer from an indirect item */ + ni.unfm_nodenum = *(unsigned long *)(item + pos * UNFM_P_SIZE);/*B_I_POS_UNFM_POINTER (bh, ih, pos);*/ + if (!is_block_used (ni.unfm_nodenum) && !is_block_uninsertable (ni.unfm_nodenum)) { + mark_block_used (ni.unfm_nodenum); + + /* this is for check only */ + mark_block_unformatted (ni.unfm_nodenum); + } else { + ni.unfm_nodenum = 0; + } + ni.unfm_freespace = ((pos == (I_UNFM_NUM (comingih) - 1)) ? get_ih_free_space (comingih) : 0); + retval = g_sb.s_blocksize - ni.unfm_freespace; + } + + reiserfsck_paste_into_item (path, (const char *)&ni, UNFM_P_SIZE); + return retval; +} + + +int must_there_be_a_hole (struct item_head * comingih, struct path * path) +{ + struct item_head * ih = PATH_PITEM_HEAD (path); + int keep_free_space; + + if (I_IS_DIRECT_ITEM (ih)) { + direct2indirect (0, path, keep_free_space = 1); + ih = PATH_PITEM_HEAD (path); + } + + path->pos_in_item = I_UNFM_NUM (ih); + if (ih->ih_key.k_offset + (I_UNFM_NUM (ih) + 1) * g_sb.s_blocksize <= comingih->ih_key.k_offset) + return 1; + + return 0; +} + + +int reiserfs_append_zero_unfm_ptr (struct path * path) +{ + struct unfm_nodeinfo ni; + int keep_free_space; + + ni.unfm_nodenum = 0; + ni.unfm_freespace = 0; + + if (I_IS_DIRECT_ITEM (PATH_PITEM_HEAD (path))) + /* convert direct item to indirect */ + direct2indirect (0, path, keep_free_space = 0); + + reiserfsck_paste_into_item (path, (const char *)&ni, UNFM_P_SIZE); + return 0; +} + + +/* write direct item to unformatted node */ +static int overwrite_by_direct_item (struct item_head * comingih, char * item, struct path * path) +{ + unsigned long unfm_ptr; + struct buffer_head * unbh, * bh; + struct item_head * ih; + int offset; + + bh = PATH_PLAST_BUFFER (path); + ih = PATH_PITEM_HEAD (path); + unfm_ptr = B_I_POS_UNFM_POINTER (bh, ih, path->pos_in_item); + if (unfm_ptr == 0 || unfm_ptr >= SB_BLOCK_COUNT (&g_sb)) { + unbh = reiserfsck_get_new_buffer (PATH_PLAST_BUFFER (path)->b_blocknr); + B_I_POS_UNFM_POINTER (bh, ih, path->pos_in_item) = unbh->b_blocknr; +/* this is for check only */ +mark_block_unformatted (unbh->b_blocknr); + mark_buffer_dirty (bh, 0); + } + else { + unbh = bread (g_sb.s_dev, unfm_ptr, bh->b_size); + if (!is_block_used (unfm_ptr)) + die ("overwrite_by_direct_item: unused block %d", unfm_ptr); + } + + offset = comingih->ih_key.k_offset % bh->b_size - 1; + if (offset + comingih->ih_item_len > MAX_DIRECT_ITEM_LEN (bh->b_size)) + die ("overwrite_by_direct_item: direct item too long (offset=%lu, length=%u)", comingih->ih_key.k_offset, comingih->ih_item_len); + + memcpy (unbh->b_data + offset, item, comingih->ih_item_len); + + save_unfm_overwriting (unbh->b_blocknr, comingih); + + if (path->pos_in_item == I_UNFM_NUM (ih) - 1 && (bh->b_size - get_ih_free_space (ih)) < (offset + comingih->ih_item_len)) { + set_ih_free_space (ih, bh->b_size - (offset + comingih->ih_item_len)); + mark_buffer_dirty (bh, 0); + } + mark_buffer_dirty (unbh, 0); + mark_buffer_uptodate (unbh, 0); + brelse (unbh); + return comingih->ih_item_len; +} + + + +void overwrite_unfm_by_unfm (unsigned long unfm_in_tree, unsigned long coming_unfm, int bytes_in_unfm) +{ + struct overwritten_unfm_segment * unfm_os_list;/* list of overwritten segments of the unformatted node */ + struct overwritten_unfm_segment unoverwritten_segment; + struct buffer_head * bh_in_tree, * coming_bh; + + if (!test_bit (coming_unfm % (g_sb.s_blocksize * 8), SB_AP_CAUTIOUS_BITMAP (&g_sb)[coming_unfm / (g_sb.s_blocksize * 8)]->b_data)) + /* block (pointed by indirect item) is free, we do not have to keep its contents */ + return; + + /* coming block is marked as used in disk bitmap. Put its contents to block in tree preserving + everything, what has been overwritten there by direct items */ + unfm_os_list = find_overwritten_unfm (unfm_in_tree, bytes_in_unfm, &unoverwritten_segment); + if (unfm_os_list) { + add_event (UNFM_OVERWRITING_UNFM); + bh_in_tree = bread (g_sb.s_dev, unfm_in_tree, g_sb.s_blocksize); + coming_bh = bread (g_sb.s_dev, coming_unfm, g_sb.s_blocksize); + + while (get_unoverwritten_segment (unfm_os_list, &unoverwritten_segment)) { + if (unoverwritten_segment.ous_begin < 0 || unoverwritten_segment.ous_end > bytes_in_unfm - 1 || + unoverwritten_segment.ous_begin > unoverwritten_segment.ous_end) + die ("overwrite_unfm_by_unfm: invalid segment found (%d %d)", unoverwritten_segment.ous_begin, unoverwritten_segment.ous_end); + + memcpy (bh_in_tree->b_data + unoverwritten_segment.ous_begin, coming_bh->b_data + unoverwritten_segment.ous_begin, + unoverwritten_segment.ous_end - unoverwritten_segment.ous_begin + 1); + mark_buffer_dirty (bh_in_tree, 0); + } + + brelse (bh_in_tree); + brelse (coming_bh); + } +} + + +/* put unformatted node pointers from incoming item over the in-tree ones */ +static int overwrite_by_indirect_item (struct item_head * comingih, unsigned long * coming_item, struct path * path, int * pos_in_coming_item) +{ + struct buffer_head * bh = PATH_PLAST_BUFFER (path); + struct item_head * ih = PATH_PITEM_HEAD (path); + int written; + unsigned long * item_in_tree; + int src_unfm_ptrs, dest_unfm_ptrs, to_copy; + int i; + + + item_in_tree = (unsigned long *)B_I_PITEM (bh, ih) + path->pos_in_item; + coming_item += *pos_in_coming_item; + + dest_unfm_ptrs = I_UNFM_NUM (ih) - path->pos_in_item; + src_unfm_ptrs = I_UNFM_NUM (comingih) - *pos_in_coming_item; + + if (dest_unfm_ptrs >= src_unfm_ptrs) { + /* whole coming item (comingih) fits into item in tree (ih) starting with path->pos_in_item */ + written = I_BYTES_NUMBER (comingih, g_sb.s_blocksize) - *pos_in_coming_item * g_sb.s_blocksize; + *pos_in_coming_item = I_UNFM_NUM (comingih); + to_copy = src_unfm_ptrs; + if (dest_unfm_ptrs == src_unfm_ptrs) + set_ih_free_space (ih, get_ih_free_space (comingih));/*??*/ + } else { + /* only part of coming item overlaps item in the tree */ + *pos_in_coming_item += dest_unfm_ptrs; + written = dest_unfm_ptrs * g_sb.s_blocksize; + to_copy = dest_unfm_ptrs; + set_ih_free_space (ih, 0); + } + + for (i = 0; i < to_copy; i ++) { + if (!is_block_used (coming_item[i]) && !is_block_uninsertable (coming_item[i])) { + if (item_in_tree[i]) { + /* do not overwrite unformatted pointer. We must save everything what is there already from + direct items */ + overwrite_unfm_by_unfm (item_in_tree[i], coming_item[i], g_sb.s_blocksize); + } else { + item_in_tree[i] = coming_item[i]; + mark_block_used (coming_item[i]); +/* this is for check only */ +mark_block_unformatted (coming_item[i]); + } + } + } + mark_buffer_dirty (bh, 0); + return written; +} + + +int reiserfsck_overwrite_file (struct item_head * comingih, char * item, struct path * path, int * pos_in_coming_item) +{ + __u32 unfm_ptr; + int written = 0; + int keep_free_space; + struct item_head * ih = PATH_PITEM_HEAD (path); + + if (comp_short_keys (ih, &(comingih->ih_key)) != KEYS_IDENTICAL) + die ("reiserfsck_overwrite_file: found [%lu %lu], new item [%lu %lu]", ih->ih_key.k_dir_id, ih->ih_key.k_objectid, + comingih->ih_key.k_dir_id, comingih->ih_key.k_objectid); + + if (I_IS_DIRECT_ITEM (ih)) { + unfm_ptr = 0; + if (I_IS_INDIRECT_ITEM (comingih)) { + if (ih->ih_key.k_offset % g_sb.s_blocksize != 1) + die ("reiserfsck_overwrite_file: second part of tail can not be overwritten by indirect item"); + /* use pointer from coming indirect item */ + unfm_ptr = *(__u32 *)(item + *pos_in_coming_item * UNFM_P_SIZE); + if (unfm_ptr >= SB_BLOCK_COUNT (&g_sb) || is_block_used (unfm_ptr) || + !was_block_used (unfm_ptr) || is_block_uninsertable (unfm_ptr)) + unfm_ptr = 0; + } + /* */ + direct2indirect (unfm_ptr, path, keep_free_space = 1); + } + + if (I_IS_DIRECT_ITEM (comingih)) { + written = overwrite_by_direct_item (comingih, item, path); + } else { + written = overwrite_by_indirect_item (comingih, (unsigned long *)item, path, pos_in_coming_item); + } + + return written; +} + + +/* + */ +int reiserfsck_file_write (struct item_head * ih, char * item) +{ + struct path path; + struct item_head * path_ih; + int count, pos_in_coming_item; + int retval; + struct key key; + int written; + + if (make_file_writeable (ih) == -1) + /* write was not completed. Skip that item. Maybe it should be + saved to lost_found */ + return 0; + + count = I_BYTES_NUMBER (ih, g_sb.s_blocksize); + pos_in_coming_item = 0; + + copy_key (&key, &(ih->ih_key)); + while (count) { + retval = usearch_by_position (&g_sb, &key, &path); + if (retval == DIRECTORY_FOUND) { + pathrelse (&path); + return 0; + } + if (retval == BYTE_FOUND) { + written = reiserfsck_overwrite_file (ih, item, &path, &pos_in_coming_item); + count -= written; + key.k_offset += written; + } + if (retval == FILE_NOT_FOUND) { + written = create_first_item_of_file (ih, item, &path, &pos_in_coming_item); + count -= written; + key.k_offset += written; + } + if (retval == BYTE_NOT_FOUND) { + path_ih = PATH_PITEM_HEAD (&path); + if (must_there_be_a_hole (ih, &path) == 1) + reiserfs_append_zero_unfm_ptr (&path); + else { + count -= reiserfsck_append_file (ih, item, pos_in_coming_item, &path); + key.k_offset += g_sb.s_blocksize; + pos_in_coming_item ++; + } + } + if (count < 0) + die ("reiserfsck_file_write: count < 0 (%d)", count); + pathrelse (&path); + } + + return I_BYTES_NUMBER (ih, g_sb.s_blocksize); +} + + + diff -u -r --new-file linux/fs/reiserfs/utils/fsck/uobjectid.c v2.4.0-test8/linux/fs/reiserfs/utils/fsck/uobjectid.c --- linux/fs/reiserfs/utils/fsck/uobjectid.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/uobjectid.c Tue May 11 16:17:29 1999 @@ -0,0 +1,188 @@ +/* + * Copyright 1996, 1997 Hans Reiser + */ + +/*#include <stdio.h> +#include <string.h>*/ +/*#include <asm/bitops.h> +#include "../include/reiserfs_fs.h" +#include "../include/reiserfs_fs_sb.h" +#include "../include/reiserfslib.h"*/ +#include "fsck.h" + + +void mark_objectid_as_used (unsigned long objectid) +{ + unsigned long * objectid_map; + + + objectid_map = (unsigned long *)(SB_DISK_SUPER_BLOCK (&g_sb) + 1); + if (objectid >= objectid_map[1]) { + objectid_map[1] = objectid + 1; + } + +} + + +#if 0 + + + +int is_objectid_used (unsigned long objectid) +{ + unsigned long * objectid_map; + int i = 0; + + objectid_map = (unsigned long *)(SB_DISK_SUPER_BLOCK (&g_sb) + 1); + + while (i < SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize) { + if (objectid == objectid_map[i]) { + return 1; /* objectid is used */ + } + + if (objectid > objectid_map[i] && objectid < objectid_map[i+1]) { + return 1; /* objectid is used */ + } + + if (objectid < objectid_map[i]) + break; + + i += 2; + } + + /* objectid is free */ + return 0; +} + + +/* we mark objectid as used. Additionally, some unused objectids can + become used. It is ok. What is unacceptable, it is when used + objectids are marked as unused */ +void mark_objectid_as_used (unsigned long objectid) +{ + int i; + unsigned long * objectid_map; + + if (is_objectid_used (objectid) == 1) { + + /*print_objectid_map (&g_sb);*/ + /*printf ("mark_objectid_as_used: objectid %lu is used", objectid);*/ + return; + } + + objectid_map = (unsigned long *)(SB_DISK_SUPER_BLOCK (&g_sb) + 1); + + for (i = 0; i < SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize; i += 2) { + if (objectid >= objectid_map [i] && objectid < objectid_map [i + 1]) + /* it is used */ + return; + + if (objectid + 1 == objectid_map[i]) { + /* size of objectid map is the same */ + objectid_map[i] = objectid; + return; + } + + if (objectid == objectid_map[i + 1]) { + /* size of objectid map is decreased */ + objectid_map[i + 1] ++; + if (i + 2 < SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize) { + if (objectid_map[i + 1] == objectid_map[i + 2]) { + memmove (objectid_map + i + 1, objectid_map + i + 1 + 2, + (SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize - (i + 2 + 2 - 1)) * sizeof (unsigned long)); + SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize -= 2; + } + } + return; + } + + if (objectid < objectid_map[i]) { + /* size of objectid map must be increased */ + if (SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize == SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_maxsize) { + /* here all objectids between objectid and objectid_map[i] get used */ + objectid_map[i] = objectid; + return; + } else { + memmove (objectid_map + i + 2, objectid_map + i, (SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize - i) * sizeof (unsigned long)); + SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize += 2; + } + + objectid_map[i] = objectid; + objectid_map[i+1] = objectid + 1; + return; + } + + } + + /* write out of current objectid map, if we have space */ + if (i < SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_maxsize) { + objectid_map[i] = objectid; + objectid_map[i + 1] = objectid + 1; + SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize += 2; + } else if (i == SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_maxsize) { + objectid_map[i - 1] = objectid + 1; + } else + die ("mark_objectid_as_used: objectid map corrupted"); + + return; +} + + +void mark_objectid_as_free (unsigned long objectid) +{ + unsigned long * oids; /* pointer to objectid map */ + int i = 0; + + oids = (unsigned long *)(SB_DISK_SUPER_BLOCK (&g_sb) + 1); + + while (i < SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize) + { + if (objectid == oids[i]) + { + if (i == 0) + die ("mark_objectid_as_free: trying to free root object id"); + oids[i]++; + + if (oids[i] == oids[i+1]) + { + /* shrink objectid map */ + if (SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize < i + 2) + die ("mark_objectid_as_free: bad cur size"); + + memmove (oids + i, oids + i + 2, (SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize - i - 2) * sizeof (unsigned long)); + SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize -= 2; + if (SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize < 2 || SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize > SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_maxsize) + die("mark_objectid_as_free: bad cur size"); + } + return; + } + + if (objectid > oids[i] && objectid < oids[i+1]) + { + /* size of objectid map is not changed */ + if (objectid + 1 == oids[i+1]) + { + oids[i+1]--; + return; + } + + if (SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize == SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_maxsize) + /* objectid map must be expanded, but there is no space */ + return; + + /* expand the objectid map*/ + memmove (oids + i + 3, oids + i + 1, (SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize - i - 1) * sizeof (unsigned long)); + oids[i+1] = objectid; + oids[i+2] = objectid + 1; + SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize += 2; + if (SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize < 2 || SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_cursize > SB_DISK_SUPER_BLOCK (&g_sb)->s_oid_maxsize) + die ("objectid_release: bad cur size"); + return; + } + i += 2; + } + + die ("objectid_release: trying to free free object id (%lu)", objectid); +} + +#endif diff -u -r --new-file linux/fs/reiserfs/utils/fsck/ustree.c v2.4.0-test8/linux/fs/reiserfs/utils/fsck/ustree.c --- linux/fs/reiserfs/utils/fsck/ustree.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/fsck/ustree.c Sun May 14 23:37:12 2000 @@ -0,0 +1,445 @@ +/* + * Copyright 1996, 1997 Hans Reiser + */ +/*#include <stdio.h> +#include <string.h>*/ +/*#include <asm/bitops.h> +#include "../include/reiserfs_fs.h" +#include "../include/reiserfs_fs_sb.h" +#include "../include/reiserfslib.h"*/ +#include "fsck.h" + +static inline int compare_keys (unsigned long * key1, unsigned long * key2, int length) +{ + for (; length--; ++key1, ++key2) { + if ( *key1 < *key2 ) + return SECOND_GREATER; + if ( *key1 > *key2 ) + return FIRST_GREATER; + } + + return KEYS_IDENTICAL; +} + + +/* compare 3 components of key */ +int comp_keys_3 (void * key1, void * key2) +{ + return compare_keys (key1, key2, 3); +} + + +/* compare 4 components of key */ +int comp_dir_entries (void * key1, void * key2) +{ + return compare_keys (key1, key2, 1); +} + +void init_tb_struct (struct tree_balance * tb, struct super_block * s, struct path * path, int size) +{ + memset (tb, '\0', sizeof(struct tree_balance)); + tb->tb_sb = s; + tb->tb_path = path; + PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL; + PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0; + tb->insert_size[0] = size; +} + +struct tree_balance * cur_tb = 0; + +void reiserfsck_paste_into_item (struct path * path, const char * body, int size) +{ + struct tree_balance tb; + + init_tb_struct (&tb, &g_sb, path, size); + if (fix_nodes (M_PASTE, &tb, 0) != CARRY_ON) + die ("reiserfsck_paste_into_item: fix_nodes failed"); + + do_balance (&tb, 0, body, M_PASTE, 0); +} + + +void reiserfsck_insert_item (struct path * path, struct item_head * ih, const char * body) +{ + struct tree_balance tb; + + init_tb_struct (&tb, &g_sb, path, IH_SIZE + ih->ih_item_len); + path->pos_in_item = 0; + if (fix_nodes (M_INSERT, &tb, ih) != CARRY_ON) + die ("reiserfsck_insert_item: fix_nodes failed"); + + do_balance (&tb, ih, body, M_INSERT, 0); +} + + +static void free_unformatted_nodes (struct item_head * ih, struct buffer_head * bh) +{ + unsigned long * punfm = (unsigned long *)B_I_PITEM (bh, ih); + int i; + + for (i = 0; i < I_UNFM_NUM (ih); i ++, punfm ++) + if (*punfm) { + struct buffer_head * to_be_forgotten; + + to_be_forgotten = find_buffer (g_sb.s_dev, *punfm, g_sb.s_blocksize); + if (to_be_forgotten) { + to_be_forgotten->b_count ++; + bforget (to_be_forgotten); + } + reiserfs_free_block (&g_sb, *punfm); +/* this is for check only */ + unmark_block_unformatted (*punfm); + } +} + + +void reiserfsck_delete_item (struct path * path) +{ + struct tree_balance tb; + struct item_head * ih = PATH_PITEM_HEAD (path); + + if (I_IS_INDIRECT_ITEM (ih)) + free_unformatted_nodes (ih, PATH_PLAST_BUFFER (path)); + + init_tb_struct (&tb, &g_sb, path, -(IH_SIZE + ih->ih_item_len)); + path->pos_in_item = 0; + if (fix_nodes (M_DELETE, &tb, 0) != CARRY_ON) + die ("reiserfsck_delete_item: fix_nodes failed"); + + do_balance (&tb, 0, 0, M_DELETE, 0); +} + + +void reiserfsck_cut_from_item (struct path * path, int cut_size) +{ + struct tree_balance tb; + struct item_head * ih; + + if (cut_size >= 0) + die ("reiserfsck_cut_from_item: cut size == %d", cut_size); + + if (I_IS_INDIRECT_ITEM (ih = PATH_PITEM_HEAD (path))) { + __u32 unfm_ptr = B_I_POS_UNFM_POINTER (PATH_PLAST_BUFFER (path), ih, I_UNFM_NUM (ih) - 1); + if (unfm_ptr) { + struct buffer_head * to_be_forgotten; + + to_be_forgotten = find_buffer (g_sb.s_dev, unfm_ptr, g_sb.s_blocksize); + if (to_be_forgotten) { + to_be_forgotten->b_count ++; + bforget (to_be_forgotten); + } + reiserfs_free_block (&g_sb, unfm_ptr); +/* this is for check only */ + unmark_block_unformatted (unfm_ptr); + } + } + + + init_tb_struct (&tb, &g_sb, path, cut_size); + if (fix_nodes (M_CUT, &tb, 0) != CARRY_ON) + die ("reiserfsck_cut_from_item: fix_nodes failed"); + + do_balance (&tb, 0, 0, M_CUT, 0); +} + + +/* uget_lkey is utils clone of stree.c/get_lkey */ +struct key * uget_lkey (struct path * path) +{ + int pos, offset = path->path_length; + struct buffer_head * bh; + + if (offset < FIRST_PATH_ELEMENT_OFFSET) + die ("uget_lkey: illegal offset in the path (%d)", offset); + + + /* While not higher in path than first element. */ + while (offset-- > FIRST_PATH_ELEMENT_OFFSET) { + if (! buffer_uptodate (PATH_OFFSET_PBUFFER (path, offset)) ) + die ("uget_lkey: parent is not uptodate"); + + /* Parent at the path is not in the tree now. */ + if (! B_IS_IN_TREE (bh = PATH_OFFSET_PBUFFER (path, offset))) + die ("uget_lkey: buffer on the path is not in tree"); + + /* Check whether position in the parent is correct. */ + if ((pos = PATH_OFFSET_POSITION (path, offset)) > B_NR_ITEMS (bh)) + die ("uget_lkey: invalid position (%d) in the path", pos); + + /* Check whether parent at the path really points to the child. */ + if (B_N_CHILD_NUM (bh, pos) != PATH_OFFSET_PBUFFER (path, offset + 1)->b_blocknr) + die ("uget_lkey: invalid block number (%d). Must be %d", + B_N_CHILD_NUM (bh, pos), PATH_OFFSET_PBUFFER (path, offset + 1)->b_blocknr); + + /* Return delimiting key if position in the parent is not equal to zero. */ + if (pos) + return B_N_PDELIM_KEY(bh, pos - 1); + } + + /* we must be in the root */ +/* + if (PATH_OFFSET_PBUFFER (path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr != SB_ROOT_BLOCK (&g_sb)) + die ("get_left_dkey: path does not start with the root"); +*/ + + /* there is no left delimiting key */ + return 0; +} + + +/* uget_rkey is utils clone of stree.c/get_rkey */ +struct key * uget_rkey (struct path * path) +{ + int pos, offset = path->path_length; + struct buffer_head * bh; + + if (offset < FIRST_PATH_ELEMENT_OFFSET) + die ("uget_rkey: illegal offset in the path (%d)", offset); + + while (offset-- > FIRST_PATH_ELEMENT_OFFSET) { + if (! buffer_uptodate (PATH_OFFSET_PBUFFER (path, offset))) + die ("uget_rkey: parent is not uptodate"); + + /* Parent at the path is not in the tree now. */ + if (! B_IS_IN_TREE (bh = PATH_OFFSET_PBUFFER (path, offset))) + die ("uget_rkey: buffer on the path is not in tree"); + + /* Check whether position in the parrent is correct. */ + if ((pos = PATH_OFFSET_POSITION (path, offset)) > B_NR_ITEMS (bh)) + die ("uget_rkey: invalid position (%d) in the path", pos); + + /* Check whether parent at the path really points to the child. */ + if (B_N_CHILD_NUM (bh, pos) != PATH_OFFSET_PBUFFER (path, offset + 1)->b_blocknr) + die ("uget_rkey: invalid block number (%d). Must be %d", + B_N_CHILD_NUM (bh, pos), PATH_OFFSET_PBUFFER (path, offset + 1)->b_blocknr); + + /* Return delimiting key if position in the parent is not the last one. */ + if (pos != B_NR_ITEMS (bh)) + return B_N_PDELIM_KEY(bh, pos); + } + + /* we must be in the root */ +/* + if (PATH_OFFSET_PBUFFER (path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr != SB_ROOT_BLOCK (&g_sb)) + die ("get_left_dkey: path does not start with the root"); +*/ + /* there is no right delimiting key */ + return 0; +} + + +static inline int ubin_search (void * key, void * base, int num, int width, int *ppos, comp_function_t comp_func) +{ + int rbound, lbound, j; + + lbound = 0; + rbound = num - 1; + for (j = (rbound + lbound) / 2; lbound <= rbound; j = (rbound + lbound) / 2) { + switch (comp_func ((void *)((char *)base + j * width), key ) ) { + case SECOND_GREATER: + lbound = j + 1; + continue; + + case FIRST_GREATER: + rbound = j - 1; + continue; + + case KEYS_IDENTICAL: + *ppos = j; + return KEY_FOUND; + } + } + + *ppos = lbound; + return KEY_NOT_FOUND; +} + + +/* this searches in tree through items */ +int usearch_by_key (struct super_block * s, struct key * key, struct path * path, int * repeat, int stop_level, int bread_par, + comp_function_t comp_func) +{ + struct buffer_head * bh; + unsigned long block = s->u.reiserfs_sb.s_rs->s_root_block; + struct path_element * curr; + + if (comp_func == 0) + comp_func = comp_keys; + if (repeat) + *repeat = CARRY_ON; + + path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; + while (1) { + curr = PATH_OFFSET_PELEMENT (path, ++ path->path_length); + bh = curr->pe_buffer = bread (s->s_dev, block, s->s_blocksize); + if (ubin_search (key, B_N_PKEY (bh, 0), B_NR_ITEMS (bh), + B_IS_ITEMS_LEVEL (bh) ? IH_SIZE : KEY_SIZE, &(curr->pe_position), comp_func) == KEY_FOUND) { + /* key found, return if this is leaf level */ + if (B_BLK_HEAD (bh)->blk_level <= stop_level) { + path->pos_in_item = 0; + return KEY_FOUND; + } + curr->pe_position ++; + } else { + /* key not found in the node */ + if (B_BLK_HEAD (bh)->blk_level <= stop_level) + return KEY_NOT_FOUND; + } + block = B_N_CHILD_NUM (bh, curr->pe_position); + } + die ("search_by_key: you can not get here"); + return 0; +} + + +/* key is key of directory entry. This searches in tree through items + and in the found directory item as well */ +int usearch_by_entry_key (struct super_block * s, struct key * key, struct path * path) +{ + struct buffer_head * bh; + struct item_head * ih; + struct key tmpkey; + + if (usearch_by_key (s, key, path, 0, DISK_LEAF_NODE_LEVEL, 0, comp_keys) == KEY_FOUND) { + path->pos_in_item = 0; + return ENTRY_FOUND; + } + + bh = PATH_PLAST_BUFFER (path); + if (PATH_LAST_POSITION (path) == 0) { + /* previous item does not exist, that means we are in leftmost + leaf of the tree */ + if (uget_lkey (path) != 0) + die ("search_by_entry_key: invalid position after search_by_key"); + if (comp_short_keys ((unsigned long *)B_N_PKEY (bh, 0), (unsigned long *)key) == KEYS_IDENTICAL) { + path->pos_in_item = 0; + return ENTRY_NOT_FOUND; + } + path->pos_in_item = 0; + return DIRECTORY_NOT_FOUND; + } + + /* take previous item */ + PATH_LAST_POSITION (path) --; + ih = PATH_PITEM_HEAD (path); + if (comp_short_keys ((unsigned long *)ih, (unsigned long *)key) != KEYS_IDENTICAL || !I_IS_DIRECTORY_ITEM (ih)) { + struct key * next_key; + + PATH_LAST_POSITION (path) ++; + /* previous item belongs to another object or is stat data, check next item */ + if (PATH_LAST_POSITION (path) < B_NR_ITEMS (PATH_PLAST_BUFFER (path))) { + /* found item is not last item of the node */ + next_key = B_N_PKEY (PATH_PLAST_BUFFER (path), PATH_LAST_POSITION (path)); + if (comp_short_keys ((unsigned long *)next_key, (unsigned long *)key) != KEYS_IDENTICAL) { + path->pos_in_item = 0; + return DIRECTORY_NOT_FOUND; + } + if (!KEY_IS_DIRECTORY_KEY (next_key)) + /* there is an item in the tree, but it is not a directory item */ + return REGULAR_FILE_FOUND; + } else { + /* found item is last item of the node */ + next_key = uget_rkey (path); + if (next_key == 0 || comp_short_keys ((unsigned long *)next_key, (unsigned long *)key) != KEYS_IDENTICAL) { + /* there is not any part of such directory in the tree */ + path->pos_in_item = 0; + return DIRECTORY_NOT_FOUND; + } + if (!KEY_IS_DIRECTORY_KEY (next_key)) + /* there is an item in the tree, but it is not a directory item */ + return REGULAR_FILE_FOUND; + + copy_key (&tmpkey, next_key); + pathrelse (path); + if (usearch_by_key (s, &tmpkey, path, 0, DISK_LEAF_NODE_LEVEL, 0, comp_keys) != KEY_FOUND || PATH_LAST_POSITION (path) != 0) + die ("search_by_entry_key: item not found by corresponding delimiting key"); + } + /* next item is the part of this directory */ + path->pos_in_item = 0; + return ENTRY_NOT_FOUND; + } + + /* previous item is part of desired directory */ + if (ubin_search (&(key->k_offset), B_I_DEH (bh, ih), I_ENTRY_COUNT (ih), DEH_SIZE, &(path->pos_in_item), comp_dir_entries) == KEY_FOUND) + return ENTRY_FOUND; + return ENTRY_NOT_FOUND; +} + + +/* key is key of byte in the regular file. This searches in tree + through items and in the found item as well */ +int usearch_by_position (struct super_block * s, struct key * key, struct path * path) +{ + struct buffer_head * bh; + struct item_head * ih; + + if (usearch_by_key (s, key, path, 0, DISK_LEAF_NODE_LEVEL, 0, comp_keys_3) == KEY_FOUND) { + ih = PATH_PITEM_HEAD (path); + if (!I_IS_DIRECT_ITEM (ih) && !I_IS_INDIRECT_ITEM (ih)) + return DIRECTORY_FOUND; + path->pos_in_item = 0; + return BYTE_FOUND; + } + + bh = PATH_PLAST_BUFFER (path); + ih = PATH_PITEM_HEAD (path); + if (PATH_LAST_POSITION (path) == 0) { + /* previous item does not exist, that means we are in leftmost leaf of the tree */ + if (comp_short_keys ((unsigned long *)B_N_PKEY (bh, 0), (unsigned long *)key) == KEYS_IDENTICAL) { + if (!I_IS_DIRECT_ITEM (ih) && !I_IS_INDIRECT_ITEM (ih)) + return DIRECTORY_FOUND; + return BYTE_NOT_FOUND; + } + return FILE_NOT_FOUND; + } + + /* take previous item */ + PATH_LAST_POSITION (path) --; + ih = PATH_PITEM_HEAD (path); + if (comp_short_keys ((unsigned long *)&ih->ih_key, (unsigned long *)key) != KEYS_IDENTICAL || + I_IS_STAT_DATA_ITEM (ih)) { + struct key * next_key; + + /* previous item belongs to another object or is a stat data, check next item */ + PATH_LAST_POSITION (path) ++; + if (PATH_LAST_POSITION (path) < B_NR_ITEMS (PATH_PLAST_BUFFER (path))) + /* next key is in the same node */ + next_key = B_N_PKEY (PATH_PLAST_BUFFER (path), PATH_LAST_POSITION (path)); + else + next_key = uget_rkey (path); + if (next_key == 0 || comp_short_keys ((unsigned long *)next_key, (unsigned long *)key) != KEYS_IDENTICAL) { + /* there is no any part of such file in the tree */ + path->pos_in_item = 0; + return FILE_NOT_FOUND; + } + + if (KEY_IS_DIRECTORY_KEY (next_key)) { + reiserfs_warning ("\ndirectory with the same key %d found\n", next_key); + return DIRECTORY_FOUND; + } + /* next item is the part of this file */ + path->pos_in_item = 0; + return BYTE_NOT_FOUND; + } + + if (I_IS_DIRECTORY_ITEM (ih)) { + return DIRECTORY_FOUND; + } + if (I_IS_STAT_DATA_ITEM (ih)) { + PATH_LAST_POSITION (path) ++; + return FILE_NOT_FOUND; + } + + /* previous item is part of desired file */ + if (I_K_KEY_IN_ITEM (ih, key, bh->b_size)) { + path->pos_in_item = key->k_offset - ih->ih_key.k_offset; + if ( I_IS_INDIRECT_ITEM (ih) ) + path->pos_in_item /= bh->b_size; + return BYTE_FOUND; + } + + path->pos_in_item = I_IS_INDIRECT_ITEM (ih) ? I_UNFM_NUM (ih) : ih->ih_item_len; + return BYTE_NOT_FOUND; +} + + diff -u -r --new-file linux/fs/reiserfs/utils/include/fsck.h v2.4.0-test8/linux/fs/reiserfs/utils/include/fsck.h --- linux/fs/reiserfs/utils/include/fsck.h Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/include/fsck.h Sun May 14 23:37:12 2000 @@ -0,0 +1,263 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser + */ +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <asm/types.h> +#include <sys/vfs.h> +#include <errno.h> +#include <unistd.h> +#include <asm/byteorder.h> +#include <asm/types.h> + +#include "misc.h" +#include "vfs.h" +#include "reiserfs_fs.h" + + +typedef __u32 blocknr_t; + +/* searches.c */ +#define KEY_FOUND 1 +#define KEY_NOT_FOUND 0 + +#define DIRECTORY_NOT_FOUND -1 + +#define FILE_NOT_FOUND -1 + + +#define reiserfsck_search_by_key(s,key,path,comp_func) search_by_key (s, key, path, 0, DISK_LEAF_NODE_LEVEL, READ_BLOCKS, comp_func) + + +/* main.c */ +int main (int argc, char * argv []); + +// +// options +// +extern int opt_verbose; +extern int opt_fsck; + +#define FSCK_DEFAULT 0 +#define FSCK_REBUILD 1 +#define FSCK_FIND_ITEM 2 +extern int opt_fsck_mode; + +extern struct key key_to_find; + +#define STOP_DEFAULT 0 +#define STOP_AFTER_PASS1 1 +#define STOP_AFTER_PASS2 2 +#define STOP_AFTER_SEMANTIC 3 +#define STOP_AFTER_REPLAY 4 +extern int opt_stop_point; + +#define SCAN_USED_PART 0 +#define SCAN_WHOLE_PARTITION 1 +extern int opt_what_to_scan; + +#define NO_LOST_FOUND 0 +#define DO_LOST_FOUND 1 +extern int opt_lost_found; + + +extern struct super_block g_sb; +extern struct reiserfs_super_block * g_old_rs; + +extern char ** g_disk_bitmap; +extern char ** g_new_bitmap; +extern char ** g_uninsertable_leaf_bitmap; +extern char ** g_formatted; +extern char ** g_unformatted; +extern int g_blocks_to_read; + + +/* pass1.c */ +void build_the_tree (void); +extern int g_unaccessed_items; +int is_item_accessed (struct item_head * ih); +void mark_item_accessed (struct item_head * ih, struct buffer_head * bh); +void mark_item_unaccessed (struct item_head * ih); + + +/* file.c */ +struct si { + struct item_head si_ih; + char * si_dnm_data; + struct si * si_next; + + // changed by XB; + struct si * last_known; +}; +void put_saved_items_into_tree (struct si *); +int reiserfsck_file_write (struct item_head * ih, char * item); +int are_file_items_correct (struct key * key, unsigned long * size, int mark_passed_items, struct path *, struct stat_data **); + + +/* pass2.c */ +typedef void (action_on_item_t)(struct si **, struct item_head *, char *); +action_on_item_t save_item; +action_on_item_t insert_item_separately; +void for_all_items_in_node (action_on_item_t action, struct si ** si, struct buffer_head * bh); +void take_bad_blocks_put_into_tree (); +void insert_each_item_separately (struct buffer_head *); + + +/* semantic.c */ +extern struct key g_root_directory_key; +void semantic_pass (void); +int check_semantic_tree (struct key * key, struct key * parent, int is_dot_dot); + + + +/* pass4.c */ +int check_unaccessed_items (void); +void pass4 (struct super_block *); + + +/* check.c */ +int check_file_system (void); +void reiserfsck_check_pass1 (void); +void reiserfsck_check_after_all (void); +int is_leaf_bad (struct buffer_head * bh); +int is_internal_bad (struct buffer_head * bh); + +void check_fs_tree (struct super_block * s); + + + +/* noname.c */ +void get_max_buffer_key (struct buffer_head * bh, struct key * key); + +/* ustree.c */ +void init_tb_struct (struct tree_balance * tb, struct super_block * s, struct path * path, int size); +void reiserfsck_paste_into_item (struct path * path, const char * body, int size); +void reiserfsck_insert_item (struct path * path, struct item_head * ih, const char * body); +void reiserfsck_delete_item (struct path * path); +void reiserfsck_cut_from_item (struct path * path, int cut_size); +typedef int (comp_function_t)(void * key1, void * key2); +int usearch_by_key (struct super_block * s, struct key * key, struct path * path, int * repeat, int stop_level, int bread_par, + comp_function_t comp_func); +int usearch_by_entry_key (struct super_block * s, struct key * key, struct path * path); +int usearch_by_position (struct super_block * s, struct key * key, struct path * path); +struct key * uget_lkey (struct path * path); +struct key * uget_rkey (struct path * path); +int comp_keys_3 (void * key1, void * key2); +int comp_dir_entries (void * key1, void * key2); + + +/* bitmap.c */ +extern int from_journal; +int reiserfs_new_blocknrs (struct reiserfs_transaction_handle *th, struct super_block * s, unsigned long * free_blocknrs, unsigned long start, int amount_needed, int for_preserve_list); +void reiserfs_free_block (struct reiserfs_transaction_handle *th, struct super_block * s, unsigned long block); +void reiserfs_free_internal_block (struct super_block * s, unsigned long block); +struct buffer_head * reiserfsck_get_new_buffer (unsigned long start); +void force_freeing (void); +int is_block_used (unsigned long block); +int was_block_used (unsigned long block); +void mark_block_used (unsigned long block); +void mark_block_uninsertable (unsigned long block); +int is_block_uninsertable (unsigned long block); +void mark_block_unformatted (unsigned long block); +void mark_block_formatted (unsigned long block); +void unmark_block_unformatted (unsigned long block); +void unmark_block_formatted (unsigned long block); + +/* objectid.c */ +int is_objectid_used (unsigned long objectid); +void mark_objectid_as_used (unsigned long objectid); +void mark_objectid_as_free (unsigned long objectid); +objectid_t get_unused_objectid (struct super_block * s); + + + +/* segments.c */ +struct overwritten_unfm_segment { + int ous_begin; + int ous_end; + struct overwritten_unfm_segment * ous_next; +}; +struct overwritten_unfm * look_for_overwritten_unfm (__u32); +struct overwritten_unfm_segment * find_overwritten_unfm (unsigned long unfm, int length, struct overwritten_unfm_segment * segment_to_init); +int get_unoverwritten_segment (struct overwritten_unfm_segment * list_head, struct overwritten_unfm_segment * unoverwritten_segment); +void save_unfm_overwriting (unsigned long unfm, struct item_head * direct_ih); +void free_overwritten_unfms (void); +void mark_formatted_pointed_by_indirect (__u32); +int is_formatted_pointed_by_indirect (__u32); + + +/* do_balan.c */ +/* lbalance.c */ +/* ibalance.c */ /* links to fs/reiser */ +/* fix_node.c */ +/* teahash3.c */ + + +/* info.c */ +struct fsck_stat { + /* pass 1,2 */ + int fs_good_leaves; + int fs_uninsertable_leaves; + int fs_rewritten_files; + int fs_leaves_used_by_indirect_items; + int fs_unfm_overwriting_unfm; + int fs_indirect_to_direct; + /* pass 3 */ + int fs_incorrect_regular_files; + int fs_fixed_size_directories; + int fs_fixed_size_files; + int fs_deleted_entries; + /* pass 4 */ + int fs_unaccessed_items; + int fs_fixed_right_delim_key; + /* fs stat */ + int fs_stat_data_items; + int fs_regular_files; + int fs_directories; + int fs_symlinks; + int fs_others; +}; + + +extern struct fsck_stat g_fsck_info; + +/* pass 1,2 */ +#define GOOD_LEAVES 0 +#define UNINSERTABLE_LEAVES 1 +#define REWRITTEN_FILES 2 +#define LEAVES_USED_BY_INDIRECT_ITEMS 3 +#define UNFM_OVERWRITING_UNFM 4 /* overwrite contents of unformatted node keeping what has been written there from direct items */ + +/* pass 3 (semantic) */ +#define INCORRECT_REGULAR_FILES 5 +#define FIXED_SIZE_DIRECTORIES 6 +#define FIXED_SIZE_FILES 7 +#define DELETED_ENTRIES 8 +#define INDIRECT_TO_DIRECT 9 + +/* pass 4 */ +#define UNACCESSED_ITEMS 10 +#define FIXED_RIGHT_DELIM_KEY 11 + +/* fs stat */ +#define STAT_DATA_ITEMS 12 +#define REGULAR_FILES 13 +#define SYMLINKS 14 +#define OTHERS 15 +#define DIRECTORIES 16 + +void add_event (int event); +int get_event (int event); +void output_information (); + + +/* journal.c */ +void replay_all (struct super_block * s); +/*int get_journal_size (struct super_block * s); +int get_journal_start (struct super_block * s);*/ +void release_journal_blocks (struct super_block * s); +void reset_journal (struct super_block * s); + diff -u -r --new-file linux/fs/reiserfs/utils/include/misc.h v2.4.0-test8/linux/fs/reiserfs/utils/include/misc.h --- linux/fs/reiserfs/utils/include/misc.h Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/include/misc.h Sun May 14 23:37:12 2000 @@ -0,0 +1,24 @@ +/* + * Copyright 1996, 1997, 1998, 1999 Hans Reiser + */ + + +void die (char * fmt, ...); +void * getmem (int size); +void freemem (const void * p); +void * expandmem (void * p, int size, int by); +int is_mounted (char * device_name); +void check_and_free_mem (void); +void print_how_far (__u32 * passed, __u32 total); +int block_write (int dev, int block, int blocksize, char * data); +int block_read (int dev, int block, int blocksize, char * data); + +loff_t reiserfs_llseek (unsigned int fd, loff_t offset, unsigned int origin); +int reiserfs_progs_set_le_bit(int, void *) ; +int reiserfs_progs_test_le_bit(int, const void *) ; + + +#if !(__GLIBC__ > 1 && __GLIBC_MINOR__ > 0) +typedef unsigned short int uint16_t; +typedef unsigned int uint32_t; +#endif diff -u -r --new-file linux/fs/reiserfs/utils/include/nokernel.h v2.4.0-test8/linux/fs/reiserfs/utils/include/nokernel.h --- linux/fs/reiserfs/utils/include/nokernel.h Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/include/nokernel.h Sun May 14 23:37:12 2000 @@ -0,0 +1,102 @@ +/* + * this is to be included by all kernel files if __KERNEL__ undefined + */ +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <asm/types.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <malloc.h> +#include <sys/vfs.h> +#include <time.h> +#include <sys/mount.h> +#include <limits.h> + +#ifndef __alpha__ +#include <asm/bitops.h> +#endif + +#include "misc.h" +#include "vfs.h" + + +#define ext2_set_bit test_and_set_bit +#define ext2_clear_bit test_and_clear_bit +#define ext2_test_bit test_bit +#define ext2_find_next_zero_bit find_next_zero_bit + +#include "reiserfs_fs.h" + +#define unlock_kernel() {;} +#define lock_kernel() {;} + + +// +// util versions of reiserfs kernel functions (./lib/vfs.c) +// +extern inline struct buffer_head * reiserfs_bread (kdev_t dev, int block, int size) +{ + return bread (dev, block, size); +} + +extern inline struct buffer_head * reiserfs_getblk (kdev_t dev, int block, int size) +{ + return getblk (dev, block, size); +} + + +#define init_special_inode(a,b,c) {;} +#define list_empty(a) 0 + +// +// fs/reiserfs/buffer.c +// +//#define reiserfs_file_buffer(bh,state) do {} while (0) +//#define reiserfs_journal_end_io 0 +//#define reiserfs_end_buffer_io_sync 0 + +// +// fs/reiserfs/journal.c +// +#define journal_mark_dirty(th,s,bh) mark_buffer_dirty (bh, 1) +#define journal_mark_dirty_nolog(th,s,bh) mark_buffer_dirty (bh, 1) +#define mark_buffer_journal_new(bh) mark_buffer_dirty (bh, 1) + +extern inline int flush_old_commits (struct super_block * s, int i) +{ + return 0; +} + +#define journal_begin(th,s,n) do {int fu = n;fu++;(th)->t_super = s;} while (0) +#define journal_release(th,s) do {} while (0) +#define journal_release_error(th,s) do {} while (0) +#define journal_init(s) 0 +#define journal_end(th,s,n) do {s=s;} while (0) +#define buffer_journaled(bh) 0 +#define journal_lock_dobalance(s) do {} while (0) +#define journal_unlock_dobalance(s) do {} while (0) +#define journal_transaction_should_end(th,n) 1 +#define push_journal_writer(s) 1 +#define pop_journal_writer(n) do {} while (0) +#define journal_end_sync(th,s,n) do {} while (0) +#define journal_mark_freed(th,s,n) do {} while (0) +#define reiserfs_in_journal(a,b,c,d,e,f) 0 +#define flush_async_commits(s) do {} while (0) + +// +// fs/reiserfs/resize.c +// +#define reiserfs_resize(s,n) do {} while (0) +#define simple_strtoul strtol + +// +// +// +#define EHASHCOLLISION 125 + +#define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO) + +extern struct super_block g_sb; +#define get_super(a) (&g_sb) diff -u -r --new-file linux/fs/reiserfs/utils/include/reiserfs.h v2.4.0-test8/linux/fs/reiserfs/utils/include/reiserfs.h --- linux/fs/reiserfs/utils/include/reiserfs.h Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/include/reiserfs.h Sun May 14 23:15:03 2000 @@ -0,0 +1,23 @@ +/* + * Copyright 1996-2000 Hans Reiser + */ + +// +// ./fs/reiserfs/utils/lib/reiserfs.c +// +int not_formatted_node (char * buf, int blocksize); +int not_data_block (struct super_block * s, b_blocknr_t block); +int uread_super_block (struct super_block * s); +int uread_bitmaps (struct super_block * s); + + +#define bh_desc(bh) ((struct reiserfs_journal_desc *)((bh)->b_data)) +#define bh_commit(bh) ((struct reiserfs_journal_commit *)((bh)->b_data)) +int get_journal_start (struct super_block * s); +int get_journal_size (struct super_block * s); +int is_desc_block (struct buffer_head * bh); +int does_desc_match_commit (struct reiserfs_journal_desc * desc, + struct reiserfs_journal_commit * commit); + +void make_dir_stat_data (struct key * dir_key, struct item_head * ih, + struct stat_data * sd); diff -u -r --new-file linux/fs/reiserfs/utils/include/resize.h v2.4.0-test8/linux/fs/reiserfs/utils/include/resize.h --- linux/fs/reiserfs/utils/include/resize.h Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/include/resize.h Sun May 14 23:15:03 2000 @@ -0,0 +1,13 @@ +/* + * Copyright 1999 Hans Reiser, see README file for licensing details. + */ + +#define print_usage_and_exit()\ + die ("Usage: %s -s[+|-]#[M|K] [-fvn] device", argv[0]) + + +/* reiserfs_resize.c */ +int expand_fs(void); + +/* fe.c */ +int resize_fs_online(char * devname, unsigned long block); diff -u -r --new-file linux/fs/reiserfs/utils/include/vfs.h v2.4.0-test8/linux/fs/reiserfs/utils/include/vfs.h --- linux/fs/reiserfs/utils/include/vfs.h Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/include/vfs.h Sat Aug 12 01:46:40 2000 @@ -0,0 +1,744 @@ +#include <sys/vfs.h> + +// +// ./include/asm-i386/page.h +// +#define BUG() do { \ + printf ("BUG at %s:%d!\n", __FILE__, __LINE__); \ + *(int *)0 = 0;\ +} while (0) + +#include <asm/atomic.h> + +// +// ./include/linux/tty.h +// +#define console_print printf + +// +// ./include/linux/kdev_t.h> +// +#include <linux/kdev_t.h> + +typedef unsigned long long kdev_t; +static inline kdev_t to_kdev_t(int dev) +{ + return MKDEV (MAJOR(dev), MINOR (dev)); +} +#define NODEV 0 + +// +// ./include/asm/atomic.h +// +// typedef struct { int counter; } atomic_t; + +#define ATOMIC_INIT(v) { (v) } +#define atomic_read(v) ((v)->counter) +#define atomic_set(v,i) (((v)->counter) = (i)) +#define atomic_inc(v) ((v)->counter ++) +#define atomic_dec(v) ((v)->counter --) + + +// +// ./include/linux/list.h +// +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +static __inline__ void __list_add(struct list_head * new, + struct list_head * prev, + struct list_head * next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/* + * Insert a new entry after the specified head.. + */ +static __inline__ void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +static __inline__ void __list_del(struct list_head * prev, + struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + +static __inline__ void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); +} + + +#define list_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + + +// +// ./include/linux/wait.h +// +struct wait_queue { +}; + +typedef struct wait_queue wait_queue_head_t; + +#define DECLARE_WAIT_QUEUE_HEAD(name) \ + wait_queue_head_t name + +static inline void init_waitqueue_head(wait_queue_head_t *q) +{ +} + + + + +// +// ./include/linux/sched.h +// +#define SCHED_YIELD 0x10 + +struct fs_struct { + atomic_t count; + int umask; + struct dentry * root, * pwd; +}; + +struct task { + int pid; + int counter; + int fsuid; + int fsgid; + int need_resched; + struct fs_struct * fs; + unsigned long policy; + int lock_depth; +}; +#define schedule() do {} while (0); + +extern inline int fsuser(void) +{ + return 0; +} + +#define CURRENT_TIME (time(0)) + +extern void __wake_up(wait_queue_head_t *q, unsigned int mode); +#define wake_up(x) __wake_up(x, 0) +void sleep_on(wait_queue_head_t *q); +extern long interruptible_sleep_on_timeout(wait_queue_head_t *q, + signed long timeout); +extern int in_group_p(gid_t); +extern inline int capable(int cap) +{ + return 0; +} + + +// +// ./include/linux/capability.h +// +#define CAP_FSETID 4 + + +// +// ./include/asm/current.h +// +extern struct task cur_task; +#define current (&cur_task) + + +// +// ./include/linux/dcache.h +// +struct qstr { + const unsigned char * name; + unsigned int len; + unsigned int hash; +}; + +struct dentry { + struct inode * d_inode; + struct qstr d_name; + unsigned char d_iname[4096]; +}; + +static __inline__ void d_add(struct dentry * entry, struct inode * inode) +{ + entry->d_inode = inode; +} + +static __inline__ int d_unhashed(struct dentry *dentry) +{ + return 0; +} + + +extern void d_instantiate(struct dentry *, struct inode *); +extern void d_delete(struct dentry *); +extern void d_move(struct dentry *, struct dentry *); +extern struct dentry * d_alloc_root(struct inode *); +extern void dput(struct dentry *dentry); + + + +// +// ./fs/namei.c +// +struct dentry * lookup_dentry(const char * name, struct dentry * base, + unsigned int lookup_flags); + + +// +// ./include/linux/mm.h +// +struct vm_area_struct { +}; + +typedef struct page { + struct address_space *mapping; + unsigned long index; + atomic_t count; + int flags; + char * address; + struct buffer_head * buffers; +} mem_map_t; + +#define PG_locked 0 +#define PG_error 1 +#define PG_referenced 2 +#define PG_uptodate 3 + +#define Page_Uptodate(page) test_bit(PG_uptodate, &(page)->flags) +#define SetPageUptodate(page) set_bit(PG_uptodate, &(page)->flags) +#define LockPage(page) set_bit(PG_locked, &(page)->flags) +#define UnlockPage(page) clear_bit(PG_locked, &(page)->flags) + +extern mem_map_t * mem_map; + +#define GFP_KERNEL 0 +#define GFP_ATOMIC 1 +#define GFP_BUFFER 2 + +extern void vmtruncate(struct inode * inode, unsigned long offset); + +// +// ./include/asm/page.h +// +#ifndef PAGE_SHIFT +#define PAGE_SHIFT 12 // for ALPHA =13 +#endif + +#ifndef PAGE_SIZE +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#endif + +#define PAGE_CACHE_SIZE PAGE_SIZE // ./include/linux/pagemap.h +#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_CACHE_MASK PAGE_MASK +#define PAGE_CACHE_SHIFT PAGE_SHIFT + +#define MAP_NR(addr) + + +// +// ./linux/include/asm-i386/pgtable.h +// +#define page_address(page) page->address + + +// +// ./include/linux/pipe_fs_i.h +// +struct pipe_inode_info { +}; + + +// +// ./include/asm/semaphore.h +// +struct semaphore { +}; + +extern inline void down(struct semaphore * sem) +{ +} + +extern inline void up(struct semaphore * sem) +{ +} + + +// +// ./include/linux/fs.h +// +#undef BLOCK_SIZE +#define BLOCK_SIZE 1024 + +#define READ 0 +#define WRITE 1 + +extern void update_atime (struct inode *); +#define UPDATE_ATIME(inode) update_atime (inode) + +/* +#if !defined (MS_RDONLY) +#define MS_RDONLY 1 +#endif +*/ + +#define BH_Uptodate 0 +#define BH_Dirty 1 +#define BH_Lock 2 +#define BH_Req 3 +#define BH_Mapped 4 +#define BH_New 5 +#define BH__Protected 6 + +struct buffer_head { + unsigned long b_blocknr; + unsigned short b_size; + unsigned short b_list; + kdev_t b_dev; + atomic_t b_count; + unsigned long b_state; + unsigned long b_flushtime; + + struct buffer_head * b_next; + struct buffer_head * b_prev; + struct buffer_head * b_hash_next; + struct buffer_head * b_hash_prev; + char * b_data; + struct page * b_page; + void (*b_end_io)(struct buffer_head *bh, int uptodate); + struct buffer_head * b_this_page; +}; + +#include <asm/bitops.h> + +#define buffer_uptodate(bh) test_bit(BH_Uptodate, &(bh)->b_state) +#define buffer_dirty(bh) test_bit(BH_Dirty, &(bh)->b_state) +#define buffer_locked(bh) test_bit(BH_Lock, &(bh)->b_state) +#define buffer_mapped(bh) test_bit(BH_Mapped, &(bh)->b_state) +#define buffer_req(bh) test_bit(BH_Req, &(bh)->b_state) + +extern inline void mark_buffer_clean(struct buffer_head * bh) +{ + clear_bit(BH_Dirty, &(bh)->b_state); +} + +extern inline void mark_buffer_dirty(struct buffer_head * bh, int flag) +{ + set_bit(BH_Dirty, &(bh)->b_state); +} +#define __mark_buffer_dirty mark_buffer_dirty +extern void balance_dirty(kdev_t); + +#define ATTR_MODE 1 +#define ATTR_UID 2 +#define ATTR_GID 4 +#define ATTR_SIZE 8 +#define ATTR_ATIME 16 +#define ATTR_MTIME 32 +#define ATTR_CTIME 64 +#define ATTR_ATIME_SET 128 +#define ATTR_MTIME_SET 256 +#define ATTR_FORCE 512 /* Not a change, but a change it */ +#define ATTR_ATTR_FLAG 1024 + +struct iattr { + unsigned int ia_valid; + umode_t ia_mode; + uid_t ia_uid; + gid_t ia_gid; + loff_t ia_size; + time_t ia_atime; + time_t ia_mtime; + time_t ia_ctime; + unsigned int ia_attr_flags; +}; + +int inode_change_ok(struct inode *inode, struct iattr *attr); + +struct file; +struct address_space_operations { + int (*writepage) (struct file *, struct dentry *, struct page *); + int (*readpage)(struct dentry *, struct page *); + int (*sync_page)(struct page *); + int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); + int (*commit_write)(struct file *, struct page *, unsigned, unsigned); + /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ + int (*bmap)(struct address_space *, long); +}; + + +struct address_space { + //struct list_head pages; /* list of pages */ + //unsigned long nrpages; /* number of pages */ + struct address_space_operations *a_ops; /* methods */ + void *host; /* owner: inode, block_device */ + //struct vm_area_struct *i_mmap; /* list of mappings */ + //spinlock_t i_shared_lock; /* and spinlock protecting it */ +}; + + +#include "reiserfs_fs_i.h" + +struct inode { + struct list_head i_list; + unsigned long i_ino; + unsigned long i_generation; + atomic_t i_count; + kdev_t i_dev; + umode_t i_mode; + nlink_t i_nlink; + uid_t i_uid; + gid_t i_gid; + kdev_t i_rdev; + loff_t i_size; + time_t i_atime; + time_t i_mtime; + time_t i_ctime; + unsigned long i_blksize; + unsigned long i_blocks; + struct semaphore i_sem; + struct inode_operations * i_op; + struct file_operations * i_fop; + struct super_block * i_sb; + struct address_space * i_mapping; + struct address_space i_data; + unsigned long i_state; + unsigned int i_flags; + struct inode * i_next; + struct inode * i_prev; + wait_queue_head_t i_wait; + union { + struct reiserfs_inode_info reiserfs_i; + } u; +}; + + +#define is_bad_inode(inode) 0 + +struct file { + struct dentry * f_dentry; + unsigned int f_flags; + loff_t f_pos; + struct file_operations * f_op; + int f_ramax, f_raend, f_rawin, f_ralen; + int f_error; +}; + + +#include "reiserfs_fs_sb.h" + +extern struct list_head super_blocks; +#define sb_entry(list) list_entry((list), struct super_block, s_list) + +struct super_block { + struct list_head s_list; + kdev_t s_dev; + unsigned long s_blocksize; + int s_blocksize_bits; + int s_dirt; + int s_flags; + struct dentry * s_root; + struct super_operations * s_op; + union { + struct reiserfs_sb_info reiserfs_sb; + } u; +}; + + +struct file_lock { +}; + +struct poll_table_struct; +typedef int (*filldir_t)(void *, const char *, int, off_t, ino_t); + +struct file_operations { + loff_t (*llseek) (struct file *, loff_t, int); + ssize_t (*read) (struct file *, char *, size_t, loff_t *); + ssize_t (*write) (struct file *, const char *, size_t, loff_t *); + int (*readdir) (struct file *, void *, filldir_t); + unsigned int (*poll) (struct file *, struct poll_table_struct *); + int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); + int (*mmap) (struct file *, struct vm_area_struct *); + int (*open) (struct inode *, struct file *); + int (*flush) (struct file *); + int (*release) (struct inode *, struct file *); + int (*fsync) (struct file *, struct dentry *); + int (*fasync) (int, struct file *, int); + int (*check_media_change) (kdev_t dev); + int (*revalidate) (kdev_t dev); + int (*lock) (struct file *, int, struct file_lock *); +}; + +struct inode_operations { + int (*create) (struct inode *,struct dentry *,int); + struct dentry * (*lookup) (struct inode *,struct dentry *); + int (*link) (struct dentry *,struct inode *,struct dentry *); + int (*unlink) (struct inode *,struct dentry *); + int (*symlink) (struct inode *,struct dentry *,const char *); + int (*mkdir) (struct inode *,struct dentry *,int); + int (*rmdir) (struct inode *,struct dentry *); + int (*mknod) (struct inode *,struct dentry *,int,int); + int (*rename) (struct inode *, struct dentry *, + struct inode *, struct dentry *); + int (*readlink) (struct dentry *, char *,int); + struct dentry * (*follow_link) (struct dentry *, struct dentry *, unsigned int); + int (*get_block) (struct inode *, long, struct buffer_head *, int); + + int (*readpage) (struct dentry *, struct page *); + int (*writepage) (struct dentry *, struct page *); + + void (*truncate) (struct inode *); + int (*permission) (struct inode *, int); + int (*revalidate) (struct dentry *); +}; + + +struct super_operations { + void (*read_inode) (struct inode *); + void (*read_inode2) (struct inode *, void *) ; + void (*write_inode) (struct inode *); + void (*dirty_inode) (struct inode *); + void (*put_inode) (struct inode *); + void (*delete_inode) (struct inode *); + int (*notify_change) (struct dentry *, struct iattr *); + void (*put_super) (struct super_block *); + void (*write_super) (struct super_block *); + int (*statfs) (struct super_block *, struct statfs *); + int (*remount_fs) (struct super_block *, int *, char *); + void (*clear_inode) (struct inode *); + void (*umount_begin) (struct super_block *); +}; + + +extern inline void mark_buffer_uptodate(struct buffer_head * bh, int on) +{ + if (on) + set_bit(BH_Uptodate, &bh->b_state); + else + clear_bit(BH_Uptodate, &bh->b_state); +} + +typedef struct { + size_t written; + size_t count; + char * buf; + int error; +} read_descriptor_t; + +typedef int (*read_actor_t)(read_descriptor_t *, struct page *, unsigned long, unsigned long); + + +extern char * kdevname(kdev_t); + +extern void clear_inode(struct inode *); +extern struct inode * get_empty_inode(void); +extern void insert_inode_hash(struct inode *); +int is_subdir (struct dentry *, struct dentry *); +extern void sync_inodes(kdev_t); + + +extern struct buffer_head * get_hash_table(kdev_t, int, int); +extern struct buffer_head * getblk(kdev_t, int, int); +extern void ll_rw_block(int, int, struct buffer_head * bh[]); +extern struct buffer_head * bread(kdev_t, int, int); +extern void brelse(struct buffer_head *); +extern inline void bforget(struct buffer_head *buf); +#define set_blocksize(x,y) do { } while (0) +extern int generic_file_mmap(struct file *, struct vm_area_struct *); +extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *); +extern void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor); +typedef int (*writepage_t)(struct dentry *, struct page *, unsigned long, unsigned long, const char *); +typedef int (get_block_t)(struct inode*,long,struct buffer_head*,int); +extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *); +//extern int block_write_partial_page (struct dentry *, struct page *, unsigned long, unsigned long, const char *); +extern int block_write_full_page(struct page *page, get_block_t *get_block); +extern int block_sync_page(struct page *page); +extern int block_read_full_page(struct page * page, get_block_t * get_block); +extern int block_prepare_write(struct page *page, unsigned from, unsigned to, + get_block_t *get_block); +int generic_block_bmap(struct address_space *mapping, long block, get_block_t *get_block); +int generic_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to); +extern void init_fifo(struct inode *); +extern void invalidate_buffers(kdev_t); + +extern int fsync_dev(kdev_t); + + +#define I_DIRTY 1 +#define I_LOCK 2 +static inline void mark_inode_dirty(struct inode *inode) +{ + inode->i_state |= I_DIRTY; +} + + + +extern struct inode *iget4(struct super_block *sb, unsigned long ino, void * notused, void * dirino); +extern void iput(struct inode *); + +extern struct inode_operations chrdev_inode_operations; +extern struct inode_operations blkdev_inode_operations; + +#define ERR_PTR(err) ((void *)((long)(err))) +extern int file_fsync(struct file *, struct dentry *); + + +// +// ./include/linux/locks.h +// +extern inline void wait_on_buffer(struct buffer_head * bh) +{ +} + +extern inline void unlock_buffer(struct buffer_head *bh) +{ + clear_bit(BH_Lock, &bh->b_state); + +} + +extern inline void lock_super(struct super_block * sb) +{ +} + +extern inline void unlock_super(struct super_block * sb) +{ +} + +#define __wait_on_buffer wait_on_buffer + +extern struct inode_operations page_symlink_inode_operations; + +extern int generic_buffer_fdatasync(struct inode *inode, unsigned long start_idx, unsigned long end_idx); + +extern ssize_t generic_read_dir(struct file *, char *, size_t, loff_t *); +extern void make_bad_inode(struct inode *); + + +// +// ./include/linux/pagemap.h +// +#define page_cache_release(x) {freemem((x)->address);freemem(x);} +extern inline void wait_on_page(struct page * page) +{ +} +extern struct page * grab_cache_page (struct address_space *, unsigned long); + + +// +// ./include/linux/kernel.h +// +#define printk printf + +// +// ./include/linux/byteorder/generic.h +// +#include <asm/byteorder.h> + +#define le16_to_cpu __le16_to_cpu +#define cpu_to_le16 __cpu_to_le16 +#define le32_to_cpu __le32_to_cpu +#define cpu_to_le32 __cpu_to_le32 +#define le64_to_cpu __le64_to_cpu +#define cpu_to_le64 __cpu_to_le64 + + +// +// ./include/linux/module.h +// +#define MOD_INC_USE_COUNT do { } while (0) +#define MOD_DEC_USE_COUNT do { } while (0) + + +// +// ./include/asm-i386/processor.h +// +extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); + + +// +// ./include/asm-i386/uaccess.h +// +#define copy_to_user(to,from,n) memcpy(to,from,n) +#define put_user(x,ptr) (*(ptr) = x) + +// +// ./include/asm-i386/param.h +// +#define HZ 100 + + + + + +// +// ./include/linux/slab.h +// +#define kmalloc(x,y) getmem(x) +#define kfree(x) freemem(x) + + +// +// ./include/linux/blkdev.h +// +#define MAX_BLKDEV 255 +extern int * blksize_size[MAX_BLKDEV]; + + +// +// ./include/linux/tqueue.h +// +#define run_task_queue(tq) do {} while (0) +#define queue_task(a,b) do {} while (0) + + +// +// +// +extern inline void bwrite (struct buffer_head * bh) +{ + /* this does not lock buffer */ + ll_rw_block (WRITE, 1, &bh); + mark_buffer_clean (bh); +} + + +// +// to be deleted +// +#define mark_buffer_dirty_balance_dirty(x,y) do { } while (0) +#define reiserfs_brelse(bh) do { } while (0) + +extern inline int reiserfs_remove_page_from_flush_list(struct reiserfs_transaction_handle * th, + struct inode * inode) +{ + return 0; +} +extern inline int reiserfs_add_page_to_flush_list(struct reiserfs_transaction_handle * th, + struct inode * inode, struct buffer_head * bh) +{ + return 0; +} + +extern inline void reiserfs_check_lock_depth (char * caller) +{ +} + + + diff -u -r --new-file linux/fs/reiserfs/utils/lib/Makefile v2.4.0-test8/linux/fs/reiserfs/utils/lib/Makefile --- linux/fs/reiserfs/utils/lib/Makefile Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/lib/Makefile Sun May 14 23:15:04 2000 @@ -0,0 +1,26 @@ +OBJS = vfs.o misc.o reiserfs.o +#OBJS = misc.o io.o inode.o +#hlam.o + +MISC = $(TMPBINDIR)/libmisc.a + +all: $(MISC) + +.c.o: + $(CC) $(CFLAGS) $< + +$(MISC): $(OBJS) + ar -r $(MISC) $(OBJS) + +clean: + rm -f *.o $(MISC) *~ + +dep: + gcc -MM $(IDIRS) *.c > .depend + +ifeq (.depend,$(wildcard .depend)) +include .depend +endif + + + diff -u -r --new-file linux/fs/reiserfs/utils/lib/misc.c v2.4.0-test8/linux/fs/reiserfs/utils/lib/misc.c --- linux/fs/reiserfs/utils/lib/misc.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/lib/misc.c Sun May 14 23:37:12 2000 @@ -0,0 +1,395 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser + */ +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <mntent.h> +#include <errno.h> +#include <asm/types.h> +#include <sys/vfs.h> +#include <unistd.h> +#include <linux/unistd.h> + +#include "misc.h" +#include "vfs.h" + + + +#ifdef __alpha__ + +int set_bit (int nr, void * addr) +{ + __u8 * p, mask; + int retval; + + p = (__u8 *)addr; + p += nr >> 3; + mask = 1 << (nr & 0x7); + /*cli();*/ + retval = (mask & *p) != 0; + *p |= mask; + /*sti();*/ + return retval; +} + + +int clear_bit (int nr, void * addr) +{ + __u8 * p, mask; + int retval; + + p = (__u8 *)addr; + p += nr >> 3; + mask = 1 << (nr & 0x7); + /*cli();*/ + retval = (mask & *p) != 0; + *p &= ~mask; + /*sti();*/ + return retval; +} + +int test_bit(int nr, const void * addr) +{ + __u8 * p, mask; + + p = (__u8 *)addr; + p += nr >> 3; + mask = 1 << (nr & 0x7); + return ((mask & *p) != 0); +} + +int find_first_zero_bit (const void *vaddr, unsigned size) +{ + const __u8 *p = vaddr, *addr = vaddr; + int res; + + if (!size) + return 0; + + size = (size >> 3) + ((size & 0x7) > 0); + while (*p++ == 255) { + if (--size == 0) + return (p - addr) << 3; + } + + --p; + for (res = 0; res < 8; res++) + if (!test_bit (res, p)) + break; + return (p - addr) * 8 + res; +} + + +int find_next_zero_bit (const void *vaddr, unsigned size, unsigned offset) +{ + const __u8 *addr = vaddr; + const __u8 *p = addr + (offset >> 3); + int bit = offset & 7, res; + + if (offset >= size) + return size; + + if (bit) { + /* Look for zero in first char */ + for (res = bit; res < 8; res++) + if (!test_bit (res, p)) + return (p - addr) * 8 + res; + p++; + } + /* No zero yet, search remaining full bytes for a zero */ + res = find_first_zero_bit (p, size - 8 * (p - addr)); + return (p - addr) * 8 + res; +} +#endif /* __alpha__ */ + + + +/*int test_and_set_bit (int nr, void * addr) +{ + int oldbit = test_bit (nr, addr); + set_bit (nr, addr); + return oldbit; +} + + +int test_and_clear_bit (int nr, void * addr) +{ + int oldbit = test_bit (nr, addr); + clear_bit (nr, addr); + return oldbit; +}*/ + + +void die (char * fmt, ...) +{ + static char buf[1024]; + va_list args; + + va_start (args, fmt); + vsprintf (buf, fmt, args); + va_end (args); + + fprintf (stderr, "\n%s\n\n\n", buf); + exit (-1); +} + + + +#define MEM_BEGIN "membegi" +#define MEM_END "mem_end" +#define MEM_FREED "__free_" +#define CONTROL_SIZE (strlen (MEM_BEGIN) + 1 + sizeof (int) + strlen (MEM_END) + 1) + + +static int get_mem_size (const char * p) +{ + const char * begin; + + begin = p - strlen (MEM_BEGIN) - 1 - sizeof (int); + return *(int *)(begin + strlen (MEM_BEGIN) + 1); +} + + +static void checkmem (const char * p, int size) +{ + const char * begin; + const char * end; + + begin = p - strlen (MEM_BEGIN) - 1 - sizeof (int); + if (strcmp (begin, MEM_BEGIN)) + die ("checkmem: memory corrupted - invalid head sign"); + + if (*(int *)(begin + strlen (MEM_BEGIN) + 1) != size) + die ("checkmem: memory corrupted - invalid size"); + + end = begin + size + CONTROL_SIZE - strlen (MEM_END) - 1; + if (strcmp (end, MEM_END)) + die ("checkmem: memory corrupted - invalid end sign"); +} + + + +void * getmem (int size) +{ + char * p; + char * mem; + + p = (char *)malloc (CONTROL_SIZE + size); + if (!p) + die ("getmem: no more memory (%d)", size); + + strcpy (p, MEM_BEGIN); + p += strlen (MEM_BEGIN) + 1; + *(int *)p = size; + p += sizeof (int); + mem = p; + memset (mem, 0, size); + p += size; + strcpy (p, MEM_END); + + checkmem (mem, size); + + return mem; +} + + +void * expandmem (void * vp, int size, int by) +{ + int allocated; + char * mem, * p = vp; + int expand_by = by; + + if (p) { + checkmem (p, size); + allocated = CONTROL_SIZE + size; + p -= (strlen (MEM_BEGIN) + 1 + sizeof (int)); + } else { + allocated = 0; + /* add control bytes to the new allocated area */ + expand_by += CONTROL_SIZE; + } + p = realloc (p, allocated + expand_by); + if (!p) + die ("expandmem: no more memory (%d)", size); + if (!vp) { + strcpy (p, MEM_BEGIN); + } + mem = p + strlen (MEM_BEGIN) + 1 + sizeof (int); + + *(int *)(p + strlen (MEM_BEGIN) + 1) = size + by; + /* fill new allocated area by 0s */ + memset (mem + size, 0, by); + strcpy (mem + size + by, MEM_END); + + checkmem (mem, size + by); + + return mem; +} + + +void freemem (const void * vp) +{ + int size; + + if (!vp) + return; + size = get_mem_size (vp); + checkmem (vp, size); + + vp -= (strlen (MEM_BEGIN) + 1 + sizeof (int)); + free ((void *)vp); +} + + +int is_mounted (char * device_name) +{ + FILE *f; + struct mntent *mnt; + + if ((f = setmntent (MOUNTED, "r")) == NULL) + return 0; + + while ((mnt = getmntent (f)) != NULL) + if (strcmp (device_name, mnt->mnt_fsname) == 0) + return 1; + endmntent (f); + + return 0; +} + + + + +static char * strs[] = +{"0%",".",".",".",".","20%",".",".",".",".","40%",".",".",".",".","60%",".",".",".",".","80%",".",".",".",".","100%"}; + +static char progress_to_be[1024]; +static char current_progress[1024]; + +static void str_to_be (char * buf, int prosents) +{ + int i; + prosents -= prosents % 4; + buf[0] = 0; + for (i = 0; i <= prosents / 4; i ++) + strcat (buf, strs[i]); +} + + +void print_how_far (__u32 * passed, __u32 total) +{ + int n; + + if (*passed == 0) + current_progress[0] = 0; + + if (*passed >= total) { + fprintf/*die*/ (stderr, "\nprint_how_far: total %u has been reached already. cur=%u\n", total, ++(*passed)); + return; + } + + (*passed) ++; + n = ((double)((double)(*passed) / (double)total) * (double)100); + + str_to_be (progress_to_be, n); + + if (strlen (current_progress) != strlen (progress_to_be)) { + fprintf (stderr, "%s", progress_to_be + strlen (current_progress)); + } + + strcat (current_progress, progress_to_be + strlen (current_progress)); + + + fflush (stdout); +} + + + +_syscall5 (int, _llseek, uint, fd, ulong, hi, ulong, lo, + loff_t *, res, uint, wh); + +loff_t reiserfs_llseek (unsigned int fd, loff_t offset, unsigned int origin) +{ + loff_t retval, result; + + retval = _llseek (fd, ((unsigned long long) offset) >> 32, + ((unsigned long long) offset) & 0xffffffff, + &result, origin); + return (retval != 0 ? (loff_t)-1 : result); + +} + + +int block_read (int dev, int block, int blocksize, char * data) +{ + loff_t pos; + ssize_t rd; + + pos = (loff_t)block * blocksize; + + if (reiserfs_llseek (dev, pos, SEEK_SET) == (loff_t)-1) + die ("block_read: lseek failed: %s", strerror (errno)); + + if ((rd = read (dev, data, blocksize)) != blocksize) { + if (rd == -1) + die ("block_read: write failed: %s", strerror (errno)); + else + die ("block_read: %d bytes written (should be %d)", rd, blocksize); + } + return 0; +} + +int block_write (int dev, int block, int blocksize, char * data) +{ + loff_t pos = 0; + ssize_t wr; + + pos = (loff_t)block * blocksize; + if (reiserfs_llseek (dev, pos, SEEK_SET) == (loff_t)-1) + die ("block_write: lseek failed: %s", strerror (errno)); + + if ((wr = write (dev, data, blocksize)) != blocksize) { + if (wr == -1) + die ("block_write: write failed: %s", strerror (errno)); + else + die ("block_write: %d bytes written (should be %d)", wr, blocksize); + } + return 0; +} + +/* + * For the benefit of those who are trying to port Linux to another + * architecture, here are some C-language equivalents. You should + * recode these in the native assmebly language, if at all possible. + * + * C language equivalents written by Theodore Ts'o, 9/26/92. + * Modified by Pete A. Zaitcev 7/14/95 to be portable to big endian + * systems, as well as non-32 bit systems. + * + * taken from ext2fs_progs, was ext2fs_set_bit + */ + +int reiserfs_progs_set_le_bit(int nr,void * addr) +{ + int mask, retval; + unsigned char *local_addr = (unsigned char *) addr; + + local_addr += nr >> 3; + mask = 1 << (nr & 0x07); + retval = (mask & *local_addr) != 0; + *local_addr |= mask; + return retval; +} + +/* taken from ext2fs progs, was ext2fs_test_bit */ +int reiserfs_progs_test_le_bit(int nr, const void * addr) +{ + int mask; + const unsigned char *local_addr = (const unsigned char *) addr; + + local_addr += nr >> 3; + mask = 1 << (nr & 0x07); + return ((mask & *local_addr) != 0); +} diff -u -r --new-file linux/fs/reiserfs/utils/lib/reiserfs.c v2.4.0-test8/linux/fs/reiserfs/utils/lib/reiserfs.c --- linux/fs/reiserfs/utils/lib/reiserfs.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/lib/reiserfs.c Thu Aug 10 19:09:05 2000 @@ -0,0 +1,385 @@ +/* + * Copyright 1996-2000 Hans Reiser + */ + +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <asm/types.h> +#include <sys/vfs.h> +#include <string.h> +#include <asm/byteorder.h> +#include <time.h> + +#include "misc.h" +#include "vfs.h" +#include "reiserfs_fs.h" +#include "reiserfs.h" + + +#define reiserfs_sb(buf) ((struct reiserfs_super_block *)(buf)) + +static int reiserfs_magic_string (char * buf) +{ + return is_reiserfs_magic_string (reiserfs_sb (buf)); +} + + + +/* returns 1 if buf looks like a leaf node, 0 otherwise */ +#if 0 // in stree.c now +static int is_leaf (char * buf, int blocksize) +{ + struct block_head * blkh; + struct item_head * ih; + int used_space; + int prev_location; + int i; + int nr; + + blkh = (struct block_head *)buf; + nr = le16_to_cpu (blkh->blk_nr_item); + + if (nr != DISK_LEAF_NODE_LEVEL) + return 0; + + if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) + /* item number is too big or too small */ + return 0; + + ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; + used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location (ih)); + if (used_space != blocksize - le16_to_cpu (blkh->blk_free_space)) + /* free space does not match to calculated amount of use space */ + return 0; + + // FIXME: it is_leaf will hit performance too much - free_space is trustable enough + + /* check tables of item heads */ + ih = (struct item_head *)(buf + BLKH_SIZE); + prev_location = blocksize; + for (i = 0; i < nr; i ++, ih ++) { + if (ih_location (ih) >= blocksize || ih_location (ih) < IH_SIZE * nr) + return 0; + if (ih_item_len (ih) < 1 || ih_itme_len (ih) > MAX_ITEM_LEN (blocksize)) + return 0; + if (prev_location - ih_location (ih) != ih_item_len (ih)) + return 0; + prev_location = ih_location (ih); + } + + /* contents of buf looks like leaf so far */ + return 1; +} + + +/* returns 1 if buf looks like an internal node, 0 otherwise */ +static int is_internal (char * buf, int blocksize) +{ + struct block_head * blkh; + int nr; + int used_space; + + blkh = (struct block_head *)buf; + if (le16_to_cpu (blkh->blk_level) <= DISK_LEAF_NODE_LEVEL || + le16_to_cpu (blkh->blk_level) > MAX_HEIGHT) + /* this level is not possible for internal nodes */ + return 0; + + nr = le16_to_cpu (blkh->blk_nr_item); + if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) + /* for internal which is not root we might check min number of keys */ + return 0; + + used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1); + if (used_space != blocksize - le16_to_cpu (blkh->blk_free_space)) + return 0; + + // more check can be written here + + return 1; +} +#endif // is_leaf and is_internal are in stree.c + +static int is_leaf (char * buf, int blocksize) +{ + struct block_head * blkh; + struct item_head * ih; + int used_space; + int prev_location; + int i; + int nr; + + blkh = (struct block_head *)buf; + if (le16_to_cpu (blkh->blk_level) != DISK_LEAF_NODE_LEVEL) + return 0; + + nr = le16_to_cpu (blkh->blk_nr_item); + if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) + /* item number is too big or too small */ + return 0; + + ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; + used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location (ih)); + if (used_space != blocksize - le16_to_cpu (blkh->blk_free_space)) + /* free space does not match to calculated amount of use space */ + return 0; + + // FIXME: it is_leaf will hit performance too much - we may have + // return 1 here + + /* check tables of item heads */ + ih = (struct item_head *)(buf + BLKH_SIZE); + prev_location = blocksize; + for (i = 0; i < nr; i ++, ih ++) { + if (ih_location (ih) >= blocksize || ih_location (ih) < IH_SIZE * nr) + return 0; + if (ih_item_len (ih) < 1 || ih_item_len (ih) > MAX_ITEM_LEN (blocksize)) + return 0; + if (prev_location - ih_location (ih) != ih_item_len (ih)) + return 0; + prev_location = ih_location (ih); + } + + // one may imagine much more checks + return 1; +} + + +/* returns 1 if buf looks like an internal node, 0 otherwise */ +static int is_internal (char * buf, int blocksize) +{ + struct block_head * blkh; + int nr; + int used_space; + + blkh = (struct block_head *)buf; + if (le16_to_cpu (blkh->blk_level) <= DISK_LEAF_NODE_LEVEL || + le16_to_cpu (blkh->blk_level) > MAX_HEIGHT) + /* this level is not possible for internal nodes */ + return 0; + + nr = le16_to_cpu (blkh->blk_nr_item); + if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) + /* for internal which is not root we might check min number of keys */ + return 0; + + used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1); + if (used_space != blocksize - le16_to_cpu (blkh->blk_free_space)) + return 0; + + // one may imagine much more checks + return 1; +} + +/* sometimes unfomatted node looks like formatted, if we check only + block_header. This is the reason, why it is so complicated. We + believe only when free space and item locations are ok + */ +int not_formatted_node (char * buf, int blocksize) +{ + struct reiserfs_journal_desc * desc; + + if (is_leaf (buf, blocksize)) + return 0; + + if (is_internal (buf, blocksize)) + return 0; + + /* super block? */ + if (reiserfs_magic_string (buf)) + return 0; + + /* journal descriptor block? */ + desc = (struct reiserfs_journal_desc *)buf; + if (!memcmp(desc->j_magic, JOURNAL_DESC_MAGIC, 8)) + return 0; + + /* contents of buf does not look like reiserfs metadata. Bitmaps + are possible here */ + return 1; +} + + +/* is this block bitmap block or block from journal or skipped area or + super block? This works for both journal format only yet */ +int not_data_block (struct super_block * s, b_blocknr_t block) +{ + int i; + + if (block < reiserfs_get_journal_block (s) + JOURNAL_BLOCK_COUNT + 1) + return 1; + for (i = 0; i < SB_BMAP_NR (s); i ++) + if (block == SB_AP_BITMAP (s)[i]->b_blocknr) + return 1; + return 0; +} + + + + +////////////////////////////////////////////////////////// +// +// in reiserfs version 0 (undistributed bitmap) +// +static int get_journal_old_start_must (struct reiserfs_super_block * s) +{ + return 3 + s->s_bmap_nr; +} + + +// +// in reiserfs version 1 (distributed bitmap) journal starts at 18-th +// +static int get_journal_start_must (struct reiserfs_super_block * s) +{ + return REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize + 2; +} + + +int get_journal_start (struct super_block * s) +{ + return s->u.reiserfs_sb.s_rs->s_journal_block; +} + + +int get_journal_size (struct super_block * s) +{ + return s->u.reiserfs_sb.s_rs->s_orig_journal_size; +} + + +int is_desc_block (struct buffer_head * bh) +{ + struct reiserfs_journal_desc * desc = bh_desc (bh); + + if (!memcmp(desc->j_magic, JOURNAL_DESC_MAGIC, 8)) + return 1; + return 0; +} + + +int does_desc_match_commit (struct reiserfs_journal_desc * desc, + struct reiserfs_journal_commit * commit) +{ + if (commit->j_trans_id != desc->j_trans_id || commit->j_len != desc->j_len || + commit->j_len > JOURNAL_TRANS_MAX || commit->j_len <= 0 ) { + return 1 ; + } + return 0 ; +} + + + +/* ./lib/inode.c */extern struct super_operations reiserfs_sops; + +// +// FIXME: 4k only now ! +// + +int uread_super_block (struct super_block * s) +{ + struct buffer_head * bh; + + + bh = bread (s->s_dev, (REISERFS_DISK_OFFSET_IN_BYTES / 4096), 4096); + if (!bh) + goto not_found; + + if (reiserfs_magic_string (bh->b_data) && + reiserfs_sb (bh->b_data)->s_journal_block == get_journal_start_must (reiserfs_sb (bh->b_data))) + /* new super block found and correct journal start */ + goto found; + + /* new super block is not the correct one */ + brelse (bh); + + bh = bread (s->s_dev, 2, 4096); + if (!bh) + goto not_found; + + if (reiserfs_magic_string (bh->b_data) && + reiserfs_sb (bh->b_data)->s_journal_block == get_journal_old_start_must (reiserfs_sb (bh->b_data))) + goto found; + + brelse (bh); + + not_found: + printf ("uread_super_block: neither new nor old reiserfs format found on dev %s\n", + kdevname (s->s_dev)); + return 1; + + found: + + s->s_blocksize = __le16_to_cpu (reiserfs_sb (bh->b_data)->s_blocksize); + s->s_blocksize_bits = 0; + while ((1 << s->s_blocksize_bits) != s->s_blocksize) + s->s_blocksize_bits ++; + + SB_BUFFER_WITH_SB (s) = bh; + SB_DISK_SUPER_BLOCK (s) = reiserfs_sb (bh->b_data); + s->s_op = &reiserfs_sops; + return 0; +} + + +static int new_format (struct super_block * s) +{ + return (reiserfs_get_journal_block (s) == get_journal_start_must (SB_DISK_SUPER_BLOCK (s))); +} + + + +int uread_bitmaps (struct super_block * s) +{ + int i, bmp ; + struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK(s); + + + SB_AP_BITMAP (s) = getmem (sizeof (struct buffer_head *) * __le16_to_cpu (rs->s_bmap_nr)); + if (!SB_AP_BITMAP (s)) { + printf ("read_bitmaps: malloc failed\n"); + return 1; + } + + bmp = SB_BUFFER_WITH_SB (s)->b_blocknr + 1; + + for (i = 0; i < __le16_to_cpu (rs->s_bmap_nr); i ++) { + SB_AP_BITMAP (s)[i] = bread (s->s_dev, bmp, s->s_blocksize); + if (!SB_AP_BITMAP (s)[i]) { + printf ("read_bitmaps: bread failed\n"); + return 1; + } + if (new_format (s)) + bmp = (i + 1) * (s->s_blocksize * 8); + else + bmp ++; + } + + return 0; +} + + + +/* prepare stat data of new directory */ +void make_dir_stat_data (struct key * dir_key, struct item_head * ih, + struct stat_data * sd) +{ + /* insert stat data item */ + copy_key (&(ih->ih_key), dir_key); + ih->ih_item_len = SD_SIZE; + ih_version (ih) = ITEM_VERSION_2; + // ih->u.ih_free_space = MAX_US_INT; + // ih->ih_reserved = 0; +/* mark_item_unaccessed (ih);*/ + + sd->sd_mode = S_IFDIR + 0755; + sd->sd_nlink = 0; + sd->sd_uid = 0; + sd->sd_gid = 0; + sd->sd_size = EMPTY_DIR_SIZE; + sd->sd_atime = sd->sd_ctime = sd->sd_mtime = time (NULL); + sd->u.sd_rdev = 0; +} + + diff -u -r --new-file linux/fs/reiserfs/utils/lib/version.c v2.4.0-test8/linux/fs/reiserfs/utils/lib/version.c --- linux/fs/reiserfs/utils/lib/version.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/lib/version.c Thu Sep 21 12:29:06 2000 @@ -0,0 +1,7 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ + +char *reiserfs_get_version_string(void) { + return "ReiserFS version 3.6.17" ; +} diff -u -r --new-file linux/fs/reiserfs/utils/lib/vfs.c v2.4.0-test8/linux/fs/reiserfs/utils/lib/vfs.c --- linux/fs/reiserfs/utils/lib/vfs.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/lib/vfs.c Sat Aug 12 01:46:40 2000 @@ -0,0 +1,1102 @@ +#include <stdio.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <asm/types.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <assert.h> + + +#include "vfs.h" +#include "misc.h" +#include "reiserfs_fs.h" + +// +// ./kernel/sched.h +// +struct fs_struct fs = {{0}, 022, 0, 0}; +struct task cur_task = {0, 0, 0, 0, 0, &fs}; + +void __wake_up(wait_queue_head_t *q, unsigned int mode) +{ + return; +} + +void sleep_on(wait_queue_head_t *q) +{ + return; +} + +long interruptible_sleep_on_timeout(wait_queue_head_t *q, + signed long timeout) +{ + return 0; +} + +// +// ./kernel/sys.c +// +int in_group_p(gid_t grp) +{ + return 0; +} + + +// +// ./mm/page_alloc.c +// +unsigned long get_free_page (void) +{ + return (unsigned long)getmem (PAGE_SIZE); +} + + +// +// ./mm/filemap.c +// +struct page *grab_cache_page(struct address_space *mapping, unsigned long index) +{ + return get_free_page (); +} + + +int generic_file_mmap(struct file * file, struct vm_area_struct * vma) +{ + return 0; +} + + +ssize_t generic_file_write(struct file * file, const char * buf, size_t count, + loff_t * ppos) +{ + struct dentry *dentry = file->f_dentry; + struct inode *inode = dentry->d_inode; + struct address_space *mapping = inode->i_mapping; + // unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur; + loff_t pos; + struct page *page, *cached_page; + unsigned long written; + long status = 0; + int err; + + cached_page = NULL; + down(&inode->i_sem); + + pos = *ppos; + err = -EINVAL; + err = file->f_error; + if (err) { + file->f_error = 0; + goto out; + } + + written = 0; + + // if (file->f_flags & O_APPEND) + //pos = inode->i_size; + + while (count) { + unsigned long bytes, index, offset; + char *kaddr; + + /* + * Try to find the page in the cache. If it isn't there, + * allocate a free page. + */ + offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ + index = pos >> PAGE_CACHE_SHIFT; + bytes = PAGE_CACHE_SIZE - offset; + if (bytes > count) + bytes = count; + + status = -ENOMEM; /* we'll assign it later anyway */ + //page = __grab_cache_page(mapping, index, &cached_page); + page = grab_cache_page(mapping, index); + if (!page) + break; + + /* We have exclusive IO access to the page.. */ + // if (!PageLocked(page)) { + // PAGE_BUG(page); + //} + + status = mapping->a_ops->prepare_write(file, page, offset, offset+bytes); + if (status) + goto unlock; + kaddr = (char*)page_address(page); + memcpy/*copy_from_user*/(kaddr+offset, buf, bytes); + //if (status) + //goto fail_write; + status = mapping->a_ops->commit_write(file, page, offset, offset+bytes); + if (!status) + status = bytes; + + if (status >= 0) { + written += status; + count -= status; + pos += status; + buf += status; + if (pos > inode->i_size) + inode->i_size = pos; + } +unlock: + /* Mark it unlocked again and drop the page.. */ + UnlockPage(page); + page_cache_release(page); + + if (status < 0) + break; + } + *ppos = pos; + + //if (cached_page) + //page_cache_free(cached_page); + + err = written ? written : status; +out: + up(&inode->i_sem); + return err; +//fail_write: + //status = -EFAULT; + // ClearPageUptodate(page); + // kunmap(page); + goto unlock; +} + + +// +// ../mm/memiry.c +// +void vmtruncate(struct inode * inode, unsigned long offset) +{ +} + + +// +// ./fs/devices.c +// +struct inode_operations chrdev_inode_operations = {0,}; +struct inode_operations blkdev_inode_operations = {0,}; + +char * kdevname(kdev_t dev) +{ + static char name[20]; + struct stat st; + + if (fstat (dev, &st) == -1) + die ("kdevname: fstat failed: %s", strerror (errno)); + sprintf (name, "[%Lx:%Lx]", MAJOR (st.st_rdev), MINOR (st.st_rdev)); + return name; +} + + +// +// ./fs/fifo.c +// +void init_fifo(struct inode * inode) +{ +} + + +// +// ./fs/attr.c +// +int inode_change_ok(struct inode *inode, struct iattr *attr) +{ + return 0; +} + + +// +// ./fs/inode.c +// +LIST_HEAD(inode_in_use); + +#define NR_INODES 1000 + +struct inode * first_inode; +int inodes = 0; + +struct inode * find_inode (unsigned long ino) +{ + struct inode * inode; + + inode = first_inode; + if (inode == 0) + return 0; + + while (1) { + if (inode->i_ino == ino) { + atomic_inc (&inode->i_count); + return inode; + } + inode = inode->i_next; + if (inode == first_inode) + break; + } + return 0; +} + + +static void clean_inode(struct inode *inode) +{ + static struct address_space_operations empty_aops = {}; + static struct inode_operations empty_iops = {}; + static struct file_operations empty_fops = {}; + memset(&inode->u, 0, sizeof(inode->u)); + inode->i_op = &empty_iops; + inode->i_fop = &empty_fops; + inode->i_nlink = 1; + inode->i_size = 0; + inode->i_data.a_ops = &empty_aops; + inode->i_data.host = (void*)inode; + inode->i_mapping = &inode->i_data; +} + +struct inode * get_empty_inode (void) +{ + struct inode * inode, * prev, * next; + + if (inodes == NR_INODES) { + first_inode->i_sb->s_op->write_inode (first_inode); + + /* set all but i_next and i_prev to 0 */ + next = first_inode->i_next; + prev = first_inode->i_prev; + memset (first_inode, 0, sizeof (struct inode)); + first_inode->i_next = next; + first_inode->i_prev = prev; + + /* move to end of list */ + first_inode = first_inode->i_next; + return first_inode->i_prev; + } + /* allocate new inode */ + inode = getmem (sizeof (struct inode)); + if (!inode) + return 0; + + /* add to end of list */ + if (first_inode) { + inode->i_prev = first_inode->i_prev; + inode->i_next = first_inode; + first_inode->i_prev->i_next = inode; + first_inode->i_prev = inode; + } else { + first_inode = inode->i_next = inode->i_prev = inode; + } + atomic_set (&inode->i_count, 1); + clean_inode (inode); + return inode; +} + + +void insert_inode_hash (struct inode * inode) +{ +} + +static struct inode * get_new_inode (struct super_block *sb, unsigned long ino, + unsigned long dirino) +{ + struct inode * inode; + + inode = get_empty_inode (); + if (inode) { + inode->i_sb = sb; + inode->i_dev = sb->s_dev; + inode->i_ino = ino; + sb->s_op->read_inode2 (inode, (void *)dirino); + return inode; + } + return 0; +} + + + +struct inode *iget4 (struct super_block *sb, unsigned long ino, void * notused, + void * dirino) +{ + struct inode * inode; + + inode = find_inode (ino); + if (inode) + return inode; + return get_new_inode (sb, ino, (unsigned long)dirino); + return 0; +} + +void iput (struct inode *inode) +{ + if (inode) { + if (atomic_read (&inode->i_count) == 0) + die ("iput: can not free free inode"); + + if (inode->i_op && inode->i_fop && + inode->i_fop->release) + inode->i_fop->release (inode, 0); + if (inode->i_sb->s_op->put_inode) + inode->i_sb->s_op->put_inode (inode); + atomic_dec (&inode->i_count); + + if (inode->i_nlink == 0) { + inode->i_sb->s_op->delete_inode (inode); + return; + } + if (inode->i_state & I_DIRTY) { + inode->i_sb->s_op->write_inode (inode); + inode->i_state &= ~I_DIRTY; + } + } +} + + +void clear_inode (struct inode *inode) +{ +} + + +void sync_inodes(kdev_t dev) +{ +} + + +void update_atime (struct inode *inode) +{ +} + +void __wait_on_inode(struct inode * inode) +{ +} + +// +// ./fs/bad_ionde.c +// +void make_bad_inode(struct inode * inode) +{ +} + + + +// +// ./fs/super.c +// +LIST_HEAD(super_blocks); + + +// +// ./fs/read_write.c +// +ssize_t generic_read_dir(struct file *filp, char *buf, size_t siz, loff_t *ppos) +{ + return -EINVAL; +} + +// +// ./fs/dcache.c +// +void d_instantiate(struct dentry *entry, struct inode * inode) +{ + entry->d_inode = inode; +} + +void d_delete(struct dentry * dentry) +{ +} + +void d_move(struct dentry * d1, struct dentry * d2) +{ +} + +int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry) +{ + return 0; +} + +struct dentry * d_alloc_root(struct inode * inode) +{ + return 0; +} + +void dput(struct dentry *dentry) +{ +} + + +// +// ./fs/namei.c +// +struct dentry * lookup_dentry(const char * name, struct dentry * base, unsigned int lookup_flags) +{ + return 0; +} + +struct inode_operations page_symlink_inode_operations = { +}; + +// +// ./fs/buffer.c +// +#define MAX_NR_BUFFERS 4000 +static int g_nr_buffers; + +#define NR_HASH_QUEUES 20 +static struct buffer_head * g_a_hash_queues [NR_HASH_QUEUES]; +static struct buffer_head * g_buffer_list_head; +static struct buffer_head * g_buffer_heads; + +static void insert_into_hash_queue (struct buffer_head * bh) +{ + int index = bh->b_blocknr % NR_HASH_QUEUES; + + if (bh->b_hash_prev || bh->b_hash_next) + die ("insert_into_hash_queue: hash queue corrupted"); + + if (g_a_hash_queues[index]) { + g_a_hash_queues[index]->b_hash_prev = bh; + bh->b_hash_next = g_a_hash_queues[index]; + } + g_a_hash_queues[index] = bh; + +/* check_hash_queues ();*/ +} + + +static void remove_from_hash_queue (struct buffer_head * bh) +{ + if (bh->b_hash_next == 0 && bh->b_hash_prev == 0 && bh != g_a_hash_queues[bh->b_blocknr % NR_HASH_QUEUES]) + /* (b_dev == 0) ? */ + return; + + if (bh == g_a_hash_queues[bh->b_blocknr % NR_HASH_QUEUES]) { + if (bh->b_hash_prev != 0) + die ("remove_from_hash_queue: hash queue corrupted"); + g_a_hash_queues[bh->b_blocknr % NR_HASH_QUEUES] = bh->b_hash_next; + } + if (bh->b_hash_next) + bh->b_hash_next->b_hash_prev = bh->b_hash_prev; + + if (bh->b_hash_prev) + bh->b_hash_prev->b_hash_next = bh->b_hash_next; + + bh->b_hash_prev = bh->b_hash_next = 0; + +/* check_hash_queues ();*/ +} + + +static void put_buffer_list_end (struct buffer_head * bh) +{ + struct buffer_head * last = 0; + + if (bh->b_prev || bh->b_next) + die ("put_buffer_list_end: buffer list corrupted"); + + if (g_buffer_list_head == 0) { + bh->b_next = bh; + bh->b_prev = bh; + g_buffer_list_head = bh; + } else { + last = g_buffer_list_head->b_prev; + + bh->b_next = last->b_next; + bh->b_prev = last; + last->b_next->b_prev = bh; + last->b_next = bh; + } +} + + +static void remove_from_buffer_list (struct buffer_head * bh) +{ + if (bh == bh->b_next) { + g_buffer_list_head = 0; + } else { + bh->b_prev->b_next = bh->b_next; + bh->b_next->b_prev = bh->b_prev; + if (bh == g_buffer_list_head) + g_buffer_list_head = bh->b_next; + } + + bh->b_next = bh->b_prev = 0; +} + + +static void put_buffer_list_head (struct buffer_head * bh) +{ + put_buffer_list_end (bh); + g_buffer_list_head = bh; +} + + +#define GROW_BUFFERS__NEW_BUFERS_PER_CALL 10 +/* creates number of new buffers and insert them into head of buffer list + */ +static int grow_buffers (int size) +{ + int i; + struct buffer_head * bh, * tmp; + + if (g_nr_buffers + GROW_BUFFERS__NEW_BUFERS_PER_CALL > MAX_NR_BUFFERS) + return 0; + + /* get memory for array of buffer heads */ + bh = (struct buffer_head *)getmem (GROW_BUFFERS__NEW_BUFERS_PER_CALL * sizeof (struct buffer_head) + sizeof (struct buffer_head *)); + if (g_buffer_heads == 0) + g_buffer_heads = bh; + else { + /* link new array to the end of array list */ + tmp = g_buffer_heads; + while (*(struct buffer_head **)(tmp + GROW_BUFFERS__NEW_BUFERS_PER_CALL) != 0) + tmp = *(struct buffer_head **)(tmp + GROW_BUFFERS__NEW_BUFERS_PER_CALL); + *(struct buffer_head **)(tmp + GROW_BUFFERS__NEW_BUFERS_PER_CALL) = bh; + } + + for (i = 0; i < GROW_BUFFERS__NEW_BUFERS_PER_CALL; i ++) { + + tmp = bh + i; + memset (tmp, 0, sizeof (struct buffer_head)); + tmp->b_data = getmem (size); + if (tmp->b_data == 0) + die ("grow_buffers: no memory for new buffer data"); + tmp->b_dev = 0; + tmp->b_size = size; + put_buffer_list_head (tmp); + + g_nr_buffers ++; + } + return GROW_BUFFERS__NEW_BUFERS_PER_CALL; +} + + +static struct buffer_head * find_buffer (int dev, int block, + int size) +{ + struct buffer_head * next; + + next = g_a_hash_queues[block % NR_HASH_QUEUES]; + for (;;) { + struct buffer_head *tmp = next; + if (!next) + break; + next = tmp->b_hash_next; + if (tmp->b_blocknr != block || tmp->b_size != size || tmp->b_dev != dev) + continue; + next = tmp; + break; + } + return next; +} + + + + +struct buffer_head * get_hash_table(kdev_t dev, int block, int size) +{ + struct buffer_head * bh; + + bh = find_buffer (dev, block, size); + if (bh) { + atomic_inc(&bh->b_count); + } + return bh; +} + + +/* if sync != 0, then do not free memory used by buffer cache */ +static void sync_buffers (int size, int sync) +{ + struct buffer_head * next = g_buffer_list_head; + int written = 0; + + for (;;) { + if (!next) + die ("sync_buffers: buffer list is corrupted"); + + if ((!size || next->b_size == size) && buffer_dirty (next) && + buffer_uptodate (next) && atomic_read (&next->b_count) == 0) { + written ++; + bwrite (next); + if (size && written == 10) + /* when size is not 0, write only 10 blocks */ + return; + } + + next = next->b_next; + if (next == g_buffer_list_head) + break; + } + + if (!sync) { + int i = 0; + next = g_buffer_list_head; + for (;;) { + if (!next) + die ("sync_buffers: buffer list is corrupted"); + if (atomic_read (&next->b_count) != 0) + die ("sync_buffers: not free buffer (%d, %d, %d)", + next->b_blocknr, next->b_size, atomic_read (&next->b_count)); + + if (buffer_dirty (next) && buffer_uptodate (next)) + die ("sync_buffers: dirty buffer found"); + + freemem (next->b_data); + i ++; + next = next->b_next; + if (next == g_buffer_list_head) + break; + } + if (i != g_nr_buffers) + die ("sync_buffers: found %d buffers, must be %d", i, g_nr_buffers); + + /* free buffer heads */ + while ((next = g_buffer_heads)) { + g_buffer_heads = *(struct buffer_head **)(next + GROW_BUFFERS__NEW_BUFERS_PER_CALL); + freemem (next); + } + } +} + +void invalidate_buffers(kdev_t dev) +{ +} + +void show_buffers (int dev, int size) +{ + int all = 0; + int dirty = 0; + int in_use = 0; /* count != 0 */ + int free = 0; + struct buffer_head * next = g_buffer_list_head; + + for (;;) { + if (!next) + die ("show_buffers: buffer list is corrupted"); + if (next->b_dev == dev && next->b_size == size) { + all ++; + if (atomic_read (&next->b_count) != 0) { + in_use ++; + } + if (buffer_dirty (next)) { + dirty ++; + } + if (!buffer_dirty (next) && atomic_read (&next->b_count) == 0) { + free ++; + } + } + next = next->b_next; + if (next == g_buffer_list_head) + break; + } + + printf ("show_buffers (dev %d, size %d): free %d, count != 0 %d, dirty %d, all %d\n", + dev, size, free, in_use, dirty, all); +} + +static struct buffer_head * get_free_buffer (int size) +{ + struct buffer_head * next; + int growed = 0; + + repeat: + next = g_buffer_list_head; + if (!next) + goto grow; + + for (;;) { + if (!next) + die ("get_free_buffer: buffer list is corrupted"); + if (atomic_read (&next->b_count) == 0 && !buffer_dirty (next) + && next->b_size == size) { + remove_from_hash_queue (next); + remove_from_buffer_list (next); + put_buffer_list_end (next); + return next; + } + next = next->b_next; + if (next == g_buffer_list_head) + break; + } + + grow: + if (grow_buffers (size) == 0) { + /* this write dirty buffers and they become reusable if their + b_count == 0 */ + sync_buffers (size, 10); + } + if (growed == 0) { + growed = 1; + goto repeat; + } + + return 0; +} + + +struct buffer_head * getblk (kdev_t dev, int block, int size) +{ + struct buffer_head * bh; + + bh = find_buffer (dev, block, size); + if (bh) { + if (!buffer_uptodate (bh)) + die ("getblk: buffer must be uptodate"); + atomic_inc (&bh->b_count); + return bh; + } + + bh = get_free_buffer (size); + if (!bh) + die ("getblk: no free buffers"); + atomic_set (&bh->b_count, 1); + bh->b_dev = dev; + bh->b_size = size; + bh->b_blocknr = block; + memset (bh->b_data, 0, size); + clear_bit(BH_Dirty, &bh->b_state); + clear_bit(BH_Uptodate, &bh->b_state); + + insert_into_hash_queue (bh); + + return bh; +} + + +struct buffer_head * bread (kdev_t dev, int block, int size) +{ + struct buffer_head * bh; + + bh = getblk (dev, block, size); + if (buffer_uptodate (bh)) + return bh; + + ll_rw_block (READ, 1, &bh); + mark_buffer_uptodate (bh, 1); + return bh; +} + + +void brelse (struct buffer_head * bh) +{ + if (bh == 0) + return; + if (atomic_read (&bh->b_count) == 0) + die ("brelse: can not free a free buffer"); + + atomic_dec (&bh->b_count); +} + + +void bforget (struct buffer_head * bh) +{ + if (bh) { + brelse (bh); + remove_from_hash_queue (bh); + remove_from_buffer_list (bh); + put_buffer_list_head (bh); + } +} + +void balance_dirty(kdev_t dev) +{ +} + +int fsync_dev (kdev_t dev) +{ + sync_buffers (0, dev); + return 0; +} + + +int file_fsync(struct file * file, struct dentry * dentry) +{ + return 0; +} + + +//int block_write_partial_page (struct dentry * dentry, struct page *page, unsigned long offset, unsigned long bytes, const char * buf) +//{ +// return 0; +//} + +int block_write_full_page (struct page *page, get_block_t *get_block) +{ + return 0; +} +int block_sync_page (struct page *page) +{ + return 0; +} + + +int block_read_full_page (struct page * page, get_block_t * get_block) +{ + struct inode * inode = (struct inode*)page->mapping->host; + struct buffer_head * bh; + + if (inode->i_sb->s_blocksize != PAGE_SIZE) + die ("block_read_full_page: block size must be 4k"); + bh = get_free_buffer (inode->i_sb->s_blocksize); + if (!bh) + return -ENOMEM; + atomic_set (&(bh->b_count), 1); + get_block(inode, page->index, bh, 0); + if (!buffer_mapped (bh)) { + memset(bh->b_data, 0, bh->b_size); + set_bit(BH_Uptodate, &bh->b_state); + } else { + ll_rw_block (READ, 1, &bh); + } + memcpy (page->address, bh->b_data, bh->b_size); + SetPageUptodate (page); + UnlockPage (page); + brelse (bh); + return 0; +} + +int block_prepare_write(struct page *page, unsigned from, unsigned to, + get_block_t *get_block) +{ + struct buffer_head bh = {0,}; + struct buffer_head * pbh = &bh; + struct inode *inode = (struct inode*)page->mapping->host; + int block; + int nr; + + bh.b_data = getmem (inode->i_sb->s_blocksize); + bh.b_size = inode->i_sb->s_blocksize; + block = page->index << PAGE_CACHE_SHIFT; + + get_block (inode, block, &bh, 1); + nr = bh.b_size; + if ((to - from + 1) < nr) + nr = to - from + 1; + if (nr != bh.b_size) { + ll_rw_block (READ, 1, &pbh); + wait_on_buffer (&bh); + } + + return 0; +} + + +int generic_block_bmap(struct address_space *mapping, long block, get_block_t *get_block) +{ + struct buffer_head tmp; + struct inode *inode = (struct inode*)mapping->host; + tmp.b_state = 0; + tmp.b_blocknr = 0; + get_block(inode, block, &tmp, 0); + return tmp.b_blocknr; +} + +int generic_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + return 0; +} + +int generic_buffer_fdatasync(struct inode *inode, unsigned long start_idx, unsigned long end_idx) +{ + return 0; +} + +// +// ./mm/filemap.c +// +void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor) +{ + struct inode * inode = filp->f_dentry->d_inode; + struct address_space *mapping = inode->i_mapping; + struct page * page; + unsigned long index, offset; + + index = *ppos >> PAGE_SHIFT; + offset = *ppos & ~PAGE_CACHE_MASK; + + while (1) { + unsigned long end_index, nr; + + end_index = inode->i_size >> PAGE_SHIFT; + if (index > end_index) + break; + nr = PAGE_SIZE; + if (index == end_index) { + nr = inode->i_size & ~PAGE_CACHE_MASK; + if (nr <= offset) + break; + } + + nr = nr - offset; + + /* allocate page to read in */ + page = getmem (sizeof (struct page)); + page->address = getmem (PAGE_SIZE); + page->index = index; + page->mapping = mapping; + if (mapping->a_ops->readpage(filp->f_dentry, page) != 0) + //if (inode->i_op->readpage (filp->f_dentry, page) != 0) + die ("do_generic_file_read: readpage failed"); + + /* actor */ + if (nr > desc->count) + nr = desc->count; + + if (actor) { + actor (desc, page, offset, nr); + } else { + memcpy (desc->buf, page_address (page) + offset, nr); + desc->buf += nr; + desc->count -= nr; + desc->written += nr; + } +/* memcpy (buf, page_address (page) + offset, nr);*/ + offset += nr; +/* buf += nr; + count -= nr;*/ +/* retval += nr;*/ + + index += offset >> PAGE_SHIFT; + offset &= ~PAGE_CACHE_MASK; + page_cache_release (page); + if (!desc->count) + break; + } + + *ppos = ((loff_t) index << PAGE_SHIFT) + offset; + return; +} + + +ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *ppos) +{ + read_descriptor_t desc; + + desc.buf = buf; + desc.written = 0; + desc.count = count; + desc.error = 0; + do_generic_file_read (filp, ppos, &desc, 0); + return desc.written; +} + + +// +// ?????????????? +// +int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +{ + return 0; +} + + + +// +// ./drivers/block/ll_rw_blk.c +// +int * blksize_size[MAX_BLKDEV] = { NULL, NULL, }; +void ll_rw_block(int rw, int nr, struct buffer_head * pbh[]) +{ + int i; + + for (i = 0; i < nr; i ++) { + if (rw == WRITE) { + if (!buffer_dirty (pbh[i]) || !buffer_uptodate (pbh[i])) + die ("ll_rw_block: do not write not uptodate or clean buffer"); + block_write (pbh[i]->b_dev, pbh[i]->b_blocknr, pbh[i]->b_size, pbh[i]->b_data); + } else { + block_read (pbh[i]->b_dev, pbh[i]->b_blocknr, pbh[i]->b_size, pbh[i]->b_data); + } + } +} + + + + +// +// util versions of reiserfs kernel functions +// + +//void wait_buffer_until_released (struct buffer_head * bh) +//{ +// assert (0); +//} + + +//int reiserfs_sync_file (struct file * file, struct dentry * dentry) +//{ +// return 0; +//} + +void reiserfs_check_buffers (kdev_t dev) +{ +} + +DECLARE_TASK_QUEUE(reiserfs_end_io_tq) ; + +// +// used by utils who works with reiserfs directly (reiserfsck, mkreiserfs, etc) +// + + + +// +// to be deleted +// +#if 0 +int is_buffer_suspected_recipient (struct super_block * s, struct buffer_head * bh) +{ + return 0; +} + +struct tree_balance; +void preserve_shifted (struct tree_balance * a, struct buffer_head ** b, + struct buffer_head * c, int d, struct buffer_head * e) +{ +} + +inline void unpreserve (struct super_block * s, struct buffer_head * bh) +{ +} + +inline void mark_suspected_recipient (struct super_block * sb, struct buffer_head * bh) +{ +} + +inline void unmark_suspected_recipient (struct super_block * sb, struct buffer_head * bh) +{ +} + +void add_to_preserve (unsigned long blocknr, struct super_block * sb) +{ +} + +int maybe_free_preserve_list (struct super_block * sb, int x) +{ + return 0; +} + +int get_space_from_preserve_list (struct super_block * s) +{ + return 0; +} + + +void preserve_invalidate (struct tree_balance * tb, struct buffer_head * bh, struct buffer_head * bh1) +{ +} + +inline void mark_buffer_unwritten (struct buffer_head * bh) +{ +} + +int ready_preserve_list (struct tree_balance * tb, struct buffer_head * bh) +{ + return 0; +} + +// +// delete the above +// + +#endif diff -u -r --new-file linux/fs/reiserfs/utils/mkreiserfs/HASHINFO v2.4.0-test8/linux/fs/reiserfs/utils/mkreiserfs/HASHINFO --- linux/fs/reiserfs/utils/mkreiserfs/HASHINFO Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/mkreiserfs/HASHINFO Mon Sep 11 05:21:52 2000 @@ -0,0 +1,56 @@ + Reiserfs file system uses a hash function plus a generation counter + in determining the key used in directory searching. + + We do this for compatibility with NFS, which needs to be able to + use a 32 (v2) or 64 bit (v3) integer to specify where it last + read from in a directory (the NFS cookie) because it + stupidly assumes that directories are implemented as files + with byte offsets and directories are never shrunk. It seems + V4 will be fixing that. This hash function determines the order + of insertions. That can have dramatic impact on performance for + large directories because it can cause a random I/O per filename + created. + + If you want certainty of avoiding hash collisions, which will cause + a -EHASHCOLLISION error if you have more collisions on a + given hash value than the maximum generation counter (128), you + should use the specify -h tea running mkreiserfs. + + The Rupasov Hash makes an attempt to preserve much of the order + that will be present in alphabetically or numerically consecutive + names while adding just enough randomness for it to work as a hash. + Note that if it gets the order reversed, the LRU algorithm + will still work better than if it fully randomizes..... + + TEA_HASH allows you to have fewer collisions, and this means that you + can have directories that are larger. As a practical matter, users + never report r5 is insufficient for applications that are not deliberately + designed to make r5 inadequate on current hardware. r5 is faster + for large directories, 30x faster for ones that exceed cache capacity. + Use r5, teahash is almost, but not quite always, the wrong answer. + This hashing feature exists to cover over bad design of telldir and NFS + cookies which assume that a byte offset into a directory has meaning and + allocate a number of bytes appropriate for use of a byte offset not a + filename for tracking position of a partial directory read in directories. + + RUPASOV_HASH: + Invented by Yuri Rupasov while studying the problems of creating + directories too large to fit into RAM. Never slower than + CRYPTO_SECURE_HASH, and for some applications involving directories + too large for RAM it can be as much as 30 times faster. For normal + size directories it makes reiserfs work with the same speed or + just a bit faster than tea hash function. + + Rupasov_hash is obsolete, please use r5_hash. + + R5_HASH: + Invented by Yuri Rupasov to solve collisions problem of rupasov_hash + in case of names with long identical tails. R5_hash gives the same or + better speed, so please use it to work with huge dirs. + + The default hash is 'r5' hash. + + examples : + # mkreiserfs /dev/xxxx -h tea + # mkreiserfs /dev/xxxx -h r5 + diff -u -r --new-file linux/fs/reiserfs/utils/mkreiserfs/Makefile v2.4.0-test8/linux/fs/reiserfs/utils/mkreiserfs/Makefile --- linux/fs/reiserfs/utils/mkreiserfs/Makefile Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/mkreiserfs/Makefile Sun May 14 23:15:04 2000 @@ -0,0 +1,47 @@ +VPATH = ../bin +vpath %.c $(REISERFS_KERNEL_SOURCE) $(REISERFS_LIB) + +# files from utils's lib directory needed for mkreiserfs +LIB_C = misc.c vfs.c version.c +LIB_OBJS = misc.o vfs.o version.o + +MKFS_OBJS = mkreiserfs.o $(LIB_OBJS) + +MKFS = $(TMPBINDIR)/mkreiserfs + +all: $(MKFS) + +.c.o: + $(CC) $(CFLAGS) $< + +$(MKFS): $(MKFS_OBJS) + $(CC) $(LFLAGS) -o $(MKFS) $(MKFS_OBJS) + +clean: + rm -f *.o $(MKFS) *~ + +dep: + gcc -MM $(IDIRS) *.c > .depend + for i in $(LIB_C); do gcc -MM $(IDIRS) ../lib/$$i >> .depend ; done + +install: + cp -f $(MKFS) $(SBIN) + if [ -d $(MANDIR) ] ; then cp mkreiserfs.8 $(MANDIR) ; gzip -9 -f $(MANDIR)/mkreiserfs.8 ; fi + + +uninstall: + rm -f $(MANDIR)/mkreiserfs.8.gz $(SBIN)/mkreiserfs + + +ifeq (.depend,$(wildcard .depend)) +include .depend +endif + + + + + + + + + diff -u -r --new-file linux/fs/reiserfs/utils/mkreiserfs/mkreiserfs.8 v2.4.0-test8/linux/fs/reiserfs/utils/mkreiserfs/mkreiserfs.8 --- linux/fs/reiserfs/utils/mkreiserfs/mkreiserfs.8 Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/mkreiserfs/mkreiserfs.8 Sun Aug 20 02:36:17 2000 @@ -0,0 +1,58 @@ +.\" -*- nroff -*- +.\" Copyright 1996, 1997 Hans Reiser. +.\" +.TH MKREISERFS 8 "February 1999" "Reiserfs utilities" +.SH NAME +mkreiserfs \- create a Linux Reiserfs file system +.SH SYNOPSIS +.B mkreiserfs +[ +.B \-b +.I block-size-in-1k-units +] +[ +.B \-h +.I hash-name +] +.I device +[ +.I size-in-blocks +] +.SH DESCRIPTION +.B mkreiserfs +utility is used to create a Linux Reiserfs file system on a device +(usually a disk partition). +.br +.I device +is the special file corresponding to the device (e.g /dev/hdXX for +IDE disk partition or /dev/sdXX for SCSI disk partition). +.br +.I size-in-blocks +is the number of blocks on the device. If omitted, it will be +determined by +.B mkreiserfs +automatically. +.SH OPTIONS +.TP +.I -b block-size-in-1k-units +Specify the size of blocks in 1024b units. In current version +.B mkreiserfs +accepts only values of 1, 2, or 4 (i.e. size of blocks can be 1024b, +2048b, or 4096b correspondingly) +.TP +.I -h hash-name +is the name of hash function used to sort names. If omitted, "r5" hash +will be used as default. You can set also "-h tea". +.\" .SH AUTHOR +.\" This version of +.\" .B mkreiserfs +.\" has been written by Hans Reiser <reiser@idiom.com>. +.SH BUGS +Not known yet. +Please, report bugs to Hans Reiser <reiser@idiom.com>. +.SH AVAILABILITY +.B mkreiserfs +sources are available for anonymous ftp from namesys.botik.ru +in /pub/linux+reiserfs/reiserfs-utils.tgz +.SH SEE ALSO +.BR reiserfsck (8) diff -u -r --new-file linux/fs/reiserfs/utils/mkreiserfs/mkreiserfs.c v2.4.0-test8/linux/fs/reiserfs/utils/mkreiserfs/mkreiserfs.c --- linux/fs/reiserfs/utils/mkreiserfs/mkreiserfs.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/mkreiserfs/mkreiserfs.c Sun Aug 20 02:36:17 2000 @@ -0,0 +1,554 @@ +/* + * Copyright 1996-1999 Hans Reiser + */ + +/* mkreiserfs is very simple. It supports only 4K blocks. It skips + first 64K of device, and then write super block, first bitmap + block, journal, root block. All this must fit into number of blocks + pointed by one bitmap. */ +/* TODO: bad block specifying */ + +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <asm/types.h> +#include <sys/vfs.h> + +#include "misc.h" +#include "vfs.h" +#include "reiserfs_fs.h" + + +#define print_usage_and_exit() die ("Usage: %s [ -f ] [-b block-size] [-h tea|r5] device [blocks-count]\n\n", argv[0]) + + +#define DEFAULT_BLOCKSIZE 4096 +#define MIN_BLOCK_AMOUNT 10000 + + +struct buffer_head * g_sb_bh; +struct buffer_head * g_rb_bh; + + +int g_block_size = DEFAULT_BLOCKSIZE; +int g_journal_size = JOURNAL_BLOCK_COUNT; +int g_hash = DEFAULT_HASH; +int g_block_number; +int g_blocks_on_device; +struct buffer_head ** g_bmap; + + +/* Given a file descriptor and an offset, check whether the offset is + a valid offset for the file - return 0 if it isn't valid or 1 if it + is */ +int valid_offset( int fd, loff_t offset ) +{ + char ch; + + if (reiserfs_llseek (fd, offset, 0) < 0) + return 0; + + if (read (fd, &ch, 1) < 1) + return 0; + + return 1; +} + + +/* calculates number of blocks on device */ +unsigned long count_blocks (char * filename, int blocksize) +{ + loff_t high, low; + int fd; + + fd = open (filename, O_RDONLY); + if (fd < 0) + die ("count_blocks: open failed (%s)", strerror (errno)); + + +#if defined (BLKGETSIZE) + { + long size; + + if (ioctl (fd, BLKGETSIZE, &size) >= 0) { + close (fd); + return size / (blocksize / 512); + } + } +#endif + + low = 0; + for( high = 1; valid_offset (fd, high); high *= 2 ) + low = high; + while (low < high - 1) { + const loff_t mid = ( low + high ) / 2; + + if (valid_offset (fd, mid)) + low = mid; + else + high = mid; + } + valid_offset (fd, 0); + close (fd); + + return (low + 1) / (blocksize); +} + + +static void mark_block_used (b_blocknr_t block, struct buffer_head ** bmap) +{ + int i, j; + + i = block / (g_block_size * 8); + j = block % (g_block_size * 8); + reiserfs_progs_set_le_bit (j, bmap[i]->b_data); + mark_buffer_dirty (bmap[i], 0); +} + + +static void make_super_block (int dev) +{ + struct reiserfs_super_block * sb; + __u32 * oids; + b_blocknr_t blocknr; + int i; + + if (SB_SIZE > g_block_size || g_block_size > REISERFS_DISK_OFFSET_IN_BYTES) + die ("mkreiserfs: blocksize (%d) too small or too big", g_block_size); + + blocknr = REISERFS_DISK_OFFSET_IN_BYTES / g_block_size; + + /* get buffer for super block */ + g_sb_bh = getblk (dev, blocknr, g_block_size); + sb = (struct reiserfs_super_block *)g_sb_bh->b_data; + + memset(g_sb_bh->b_data, 0, g_block_size) ; + + sb->s_block_count = cpu_to_le32 (g_block_number); /* size of filesystem in blocks */ + sb->s_bmap_nr = cpu_to_le16 ((g_block_number + (g_block_size * 8 - 1)) / (g_block_size * 8));/* number of needed bitmaps */ + + /* used blocks are: blocknr-s skipped, super block, bitmap blocks, journal, root block */ + sb->s_free_blocks = cpu_to_le32 (g_block_number - blocknr - + 1/* super block */ - g_journal_size - 1/* journal head */ + - le16_to_cpu (sb->s_bmap_nr) - 1/* root */); + /* root block is after skipped (blocknr), super block (1), first + bitmap(1), journal (g_journal_size), journal header(1) */ + sb->s_root_block = cpu_to_le32 (blocknr + 1 + 1 + g_journal_size + 1); + + sb->s_version = cpu_to_le16 (REISERFS_VERSION_2); + sb->s_blocksize = cpu_to_le16 (g_block_size); + sb->s_state = cpu_to_le16 (REISERFS_VALID_FS); + sb->s_tree_height = cpu_to_le16 (2); + sb->s_journal_dev = cpu_to_le32 (0) ; + sb->s_orig_journal_size = cpu_to_le32 (g_journal_size) ; + sb->s_journal_trans_max = cpu_to_le32 (0) ; + sb->s_journal_block_count = cpu_to_le32 (0) ; + sb->s_journal_max_batch = cpu_to_le32 (0) ; + sb->s_journal_max_commit_age = cpu_to_le32 (0) ; + sb->s_journal_max_trans_age = cpu_to_le32 (0) ; + memcpy (sb->s_magic, REISER2FS_SUPER_MAGIC_STRING, sizeof (REISER2FS_SUPER_MAGIC_STRING)); + sb->s_hash_function_code = cpu_to_le32 (g_hash); + + /* initialize object map */ + oids = (__u32 *)(sb + 1); + + oids[0] = cpu_to_le32 (1); + oids[1] = cpu_to_le32 (REISERFS_ROOT_OBJECTID + 1); /* objectids > REISERFS_ROOT_OBJECTID are free */ + sb->s_oid_cursize = cpu_to_le16 (2); + sb->s_oid_maxsize = cpu_to_le16 ((g_block_size - SB_SIZE) / sizeof (__u32) / 2 * 2); + + mark_buffer_dirty (g_sb_bh, 0); + mark_buffer_uptodate (g_sb_bh, 1); + + /* allocate bitmap blocks */ + blocknr ++; + g_bmap = (struct buffer_head **)getmem (le16_to_cpu (sb->s_bmap_nr) * sizeof (struct buffer_head *)); + for (i = 0; i < sb->s_bmap_nr; i ++) { + g_bmap[i] = getblk (dev, blocknr, sb->s_blocksize); + mark_buffer_dirty (g_bmap[i], 0); + mark_buffer_uptodate (g_bmap[i], 1); + blocknr = (i + 1) * sb->s_blocksize * 8; + } + + /* mark skipped blocks busy */ + for (i = 0; i < g_sb_bh->b_blocknr; i ++) + mark_block_used (i, g_bmap); + + /* mark super block */ + mark_block_used (i++, g_bmap); + + /* first bitmap block */ + mark_block_used (i++, g_bmap); + + /* first cautious bitmap block */ /* DELETE */ +/* mark_block_used (i++, g_bmap);*/ + + sb->s_journal_block = cpu_to_le32 (i); + for (; i <= g_sb_bh->b_blocknr + 2 + g_journal_size; i ++) + mark_block_used (i, g_bmap); + + /* make sure, that all journal blocks pointed by first bitmap */ + if (i >= g_block_size * 8) + die ("mkreiserfs: Journal too big"); + /* make sure, that we still have some amount (100 blocks) of free space */ + if (i + 100 > g_block_number) + die ("mkreiserfs: size of file system is too small"); + /* mark root block as used */ + mark_block_used (i, g_bmap); + + + /* set up other bitmap blocks */ + for (i = 1; i < le16_to_cpu (sb->s_bmap_nr); i ++) { + mark_block_used (i * g_block_size * 8, g_bmap); + /* cautious bitmap block */ /* DELETE */ +/* mark_block_used (i * g_block_size * 8 + 1, g_bmap);*/ + } + + /* unused space of last bitmap is filled by 1s */ + for (i = sb->s_bmap_nr * sb->s_blocksize * 8; --i >= sb->s_block_count; ) + mark_block_used (i, g_bmap); + + return; +} + + +void zero_journal_blocks(int dev, int start, int len) { + int i ; + struct buffer_head *bh ; + int done = 0; + + printf ("Initializing journal - "); fflush (stdout); + + for (i = 0 ; i < len ; i++) { + print_how_far (&done, len); + bh = getblk (dev, start + i, g_block_size) ; + memset(bh->b_data, 0, g_block_size) ; + mark_buffer_dirty(bh,0) ; + mark_buffer_uptodate(bh, 1) ; + bwrite (bh); + brelse(bh) ; + } + printf ("\n"); fflush (stdout); +} + + +/* form the root block of the tree (the block head, the item head, the + root directory) */ +void make_root_block () +{ + struct reiserfs_super_block * sb = (struct reiserfs_super_block *)g_sb_bh->b_data; + char * rb; + struct block_head * blkh; + struct item_head * ih; + struct stat_data * sd; + struct key maxkey = {0xffffffff, 0xffffffff, {{0xffffffff, 0xffffffff}, }}; + + + /* get root block */ + g_rb_bh = getblk (g_sb_bh->b_dev, le32_to_cpu (sb->s_root_block), g_sb_bh->b_size); + rb = g_rb_bh->b_data; + + /* first item is stat data item of root directory */ + ih = (struct item_head *)(rb + BLKH_SIZE); + ih_version (ih) = cpu_to_le16 (ITEM_VERSION_2); + ih->ih_key.k_dir_id = cpu_to_le32 (REISERFS_ROOT_PARENT_OBJECTID); + ih->ih_key.k_objectid = cpu_to_le32 (REISERFS_ROOT_OBJECTID); + set_le_ih_k_offset (ih, SD_OFFSET); + set_le_ih_k_type (ih, TYPE_STAT_DATA); + ih->ih_item_len = cpu_to_le16 (SD_SIZE); + ih->ih_item_location = cpu_to_le16 (le16_to_cpu (sb->s_blocksize) + - SD_SIZE); + set_ih_free_space (ih, MAX_US_INT); + + /* fill NEW stat data */ + sd = (struct stat_data *)(rb + ih->ih_item_location); + sd->sd_mode = cpu_to_le16 (S_IFDIR + 0755); + sd->sd_nlink = cpu_to_le16 (3); + sd->sd_uid = cpu_to_le32 (getuid ()); + sd->sd_gid = cpu_to_le32 (getgid ()); + sd->sd_size = cpu_to_le64 (EMPTY_DIR_SIZE); + sd->sd_atime = sd->sd_ctime = sd->sd_mtime = cpu_to_le32 (time (NULL)); + sd->u.sd_rdev = cpu_to_le32 (0); + + + /* second item is root directory item, containing "." and ".." */ + ih ++; + ih_version (ih) = cpu_to_le16 (ITEM_VERSION_1); + ih->ih_key.k_dir_id = cpu_to_le32 (REISERFS_ROOT_PARENT_OBJECTID); + ih->ih_key.k_objectid = cpu_to_le32 (REISERFS_ROOT_OBJECTID); + set_le_ih_k_offset (ih, DOT_OFFSET); + set_le_ih_k_type (ih, TYPE_DIRENTRY); + ih->ih_item_len = cpu_to_le16 (DEH_SIZE * 2 + ROUND_UP (strlen (".")) + + ROUND_UP (strlen (".."))); + ih->ih_item_location = cpu_to_le16 (le16_to_cpu ((ih-1)->ih_item_location) - + le16_to_cpu (ih->ih_item_len)); + ih->u.ih_entry_count = cpu_to_le32 (2); + + /* compose item itself */ + make_empty_dir_item (rb + ih->ih_item_location, + REISERFS_ROOT_PARENT_OBJECTID, REISERFS_ROOT_OBJECTID, + 0, REISERFS_ROOT_PARENT_OBJECTID); + + /* block head */ + blkh = (struct block_head *)rb; + blkh->blk_level = cpu_to_le16 (DISK_LEAF_NODE_LEVEL); + blkh->blk_nr_item = cpu_to_le16 (2); + blkh->blk_free_space = cpu_to_le16 (le32_to_cpu (sb->s_blocksize) - BLKH_SIZE - + 2 * IH_SIZE - SD_SIZE - le16_to_cpu (ih->ih_item_len)); + // kept just for compatibility only (not used) + blkh->blk_right_delim_key = maxkey; + + mark_buffer_dirty (g_rb_bh, 0); + mark_buffer_uptodate (g_rb_bh, 1); + return; +} + + +/* + * write the super block, the bitmap blocks and the root of the tree + */ +static void make_new_filesystem (void) +{ + struct reiserfs_super_block * sb = (struct reiserfs_super_block *)g_sb_bh->b_data; + int i; + + printf ("journal size %d (from %d)\n", + sb->s_orig_journal_size, sb->s_journal_block); + + zero_journal_blocks (g_sb_bh->b_dev, sb->s_journal_block, + sb->s_orig_journal_size + 1); + + /* bitmap blocks */ + for (i = 0; i < sb->s_bmap_nr; i ++) { + bwrite (g_bmap[i]); + brelse (g_bmap[i]); + } + + /* root block */ + bwrite (g_rb_bh); + brelse (g_rb_bh); + + /* super block */ + bwrite (g_sb_bh); + brelse (g_sb_bh); +} + + +void report (void) +{ + struct reiserfs_super_block * sb = (struct reiserfs_super_block *)g_sb_bh->b_data; + unsigned int i; + + printf ("Block size %d bytes\n", le16_to_cpu (sb->s_blocksize)); + printf ("Block count %d\n", g_block_number); + printf ("Used blocks %d\n", g_block_number - le32_to_cpu (sb->s_free_blocks)); + printf ("\tJournal - %d blocks (%d-%d), journal header is in block %d\n", + g_journal_size, + le32_to_cpu (sb->s_journal_block), + le32_to_cpu (sb->s_journal_block) + g_journal_size - 1, + le32_to_cpu (sb->s_journal_block) + g_journal_size) ; + printf ("\tBitmaps: "); + for (i = 0; i < le16_to_cpu (sb->s_bmap_nr); i ++) + printf ("%ld%s", g_bmap[i]->b_blocknr, + (i == le16_to_cpu (sb->s_bmap_nr) - 1) ? "\n" : ", "); + printf ("\tRoot block %d\n", le32_to_cpu (sb->s_root_block)); + printf ("Hash function \"%s\"\n", g_hash == TEA_HASH ? "tea" : + ((g_hash == YURA_HASH) ? "rupasov" : "r5")); + fflush (stdout); +} + + +static void discard_old_filesystems (int dev) +{ + struct buffer_head * bh; + + /* discard vfat/msods (0-th 512 byte sector) and ext2 (1-st 1024 + byte block) */ + bh = getblk (dev, 0, 2048); + memset (bh->b_data, 0, 2048); + mark_buffer_uptodate (bh, 1); + mark_buffer_dirty (bh, 1); + bwrite (bh); + brelse (bh); + + /* discard super block of reiserfs of old format (8-th 1024 byte block) */ + bh = getblk (dev, 8, 1024); + memset (bh->b_data, 0, 1024); + mark_buffer_uptodate (bh, 1); + mark_buffer_dirty (bh, 1); + bwrite (bh); + brelse (bh); + +} + + +static void set_block_size (char * str) +{ + char * tmp; + + g_block_size = (int) strtol (str, &tmp, 0); + if (*tmp) + die ("mkreiserfs: wrong block size specified: %s\n", str); + + if (g_block_size != 4) + die ("mkreiserfs: %dk is wrong block size", g_block_size); + + g_block_size *= 1024; +} + + +/* journal must fit into number of blocks pointed by first bitmap */ +static void set_journal_size (char * str) +{ + char * tmp; + + g_journal_size = (int) strtol (str, &tmp, 0); + if (*tmp) + die ("mkreiserfs: wrong journal size specified: %s\n", str); + + if (g_journal_size < JOURNAL_BLOCK_COUNT / 2 || + g_journal_size > JOURNAL_BLOCK_COUNT * 2) + die ("mkreiserfs: wrong journal size specified: %s\n", str); +} + + +/* journal size is specified already */ +static void set_block_number (char * str) +{ + char * tmp; + + g_block_number = (int) strtol (str, &tmp, 0); + if (*tmp) + die ("mkreiserfs: wrong block number specified: %s\n", str); + + if (g_block_number > g_blocks_on_device) + die ("mkreiserfs: there are not so many blocks on device", g_block_number); + +} + + +static void set_hash_function (char * str) +{ + if (!strcmp (str, "tea")) + g_hash = TEA_HASH; + else if (!strcmp (str, "rupasov")) + g_hash = YURA_HASH; + else if (!strcmp (str, "r5")) + g_hash = R5_HASH; + else + printf ("mkreiserfs: wrong hash type specified. Using default\n"); +} + +int main (int argc, char **argv) +{ + int dev; + int force = 0; + struct stat statbuf; + char * device_name; + char c; + + printf ("\n\n<----------- MKREISERFSv2 ----------->\n\n"); + +#if 1 + clear_inode (0); + if (0) { + /* ???? */ + getblk (0,0,0); + iput (0); + } +#endif + + if (argc < 2) + print_usage_and_exit (); + + while ( ( c = getopt( argc, argv, "fb:c:h:" ) ) != EOF ) + switch( c ) { + case 'f' : /* force if file is not a block device */ + force = 1; + break; + + case 'b' : /* -b n - where n is 1,2 or 4 */ + set_block_size (optarg); + break; + + case 'c': + set_journal_size (optarg); + break; + + case 'h': + set_hash_function (optarg); + break; + + default : + print_usage_and_exit (); + } + device_name = argv [optind]; + if (is_mounted (device_name)) + die ("mkreiserfs: '%s' contains a mounted file system\n", device_name); + + + /* get block number for file system */ + g_blocks_on_device = count_blocks (device_name, g_block_size); + g_block_number = g_blocks_on_device; + if (optind == argc - 2) + set_block_number (argv[optind + 1]); + else if (optind != argc - 1) + print_usage_and_exit (); + + g_block_number = g_block_number / 8 * 8; + + if (g_block_number < MIN_BLOCK_AMOUNT) + die ("mkreiserfs: block number %d (truncated to n*8) is too low", + g_block_number); + + if (stat (device_name, &statbuf) < 0) + die ("mkreiserfs: unable to stat %s", device_name); + + if (!S_ISBLK (statbuf.st_mode) && ( force == 1 )) + die ("mkreiserfs: '%s (%o)' is not a block device", device_name, statbuf.st_mode); + else /* Ignore any 'full' fixed disk devices */ + if ( statbuf.st_rdev == 0x0300 || statbuf.st_rdev == 0x0340 + || statbuf.st_rdev == 0x0400 || statbuf.st_rdev == 0x0410 + || statbuf.st_rdev == 0x0420 || statbuf.st_rdev == 0x0430 + || statbuf.st_rdev == 0x0d00 || statbuf.st_rdev == 0x0d40 ) + /* ???? */ + die ("mkreiserfs: will not try to make filesystem on '%s'", device_name); + dev = open (device_name, O_RDWR); + if (dev == -1) + die ("mkreiserfs: can not open '%s': %s", device_name, strerror (errno)); + + /* these fill buffers (super block, first bitmap, root block) with + reiserfs structures */ + make_super_block (dev); + make_root_block (); + + report (); + + printf ("ATTENTION: ALL DATA WILL BE LOST ON '%s'! (y/n)", device_name); + c = getchar (); + if (c != 'y' && c != 'Y') + die ("mkreiserfs: Disk was not formatted"); + + discard_old_filesystems (dev); + make_new_filesystem (); + + + + /* 0 means: write all buffers and free memory */ + fsync_dev (0); + + printf ("Syncing.."); fflush (stdout); + sync (); + printf ("done.\n\n"); + + return 0; + +} diff -u -r --new-file linux/fs/reiserfs/utils/resize_reiserfs/Makefile v2.4.0-test8/linux/fs/reiserfs/utils/resize_reiserfs/Makefile --- linux/fs/reiserfs/utils/resize_reiserfs/Makefile Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/resize_reiserfs/Makefile Sun May 14 23:15:04 2000 @@ -0,0 +1,40 @@ + +VPATH = ../bin + +OBJS = resize_reiserfs.o fe.o + +RESIZER = $(TMPBINDIR)/resize_reiserfs + +all: $(RESIZER) + +.c.o: + $(CC) $(CFLAGS) -Wall -g $< + +$(RESIZER): $(OBJS) libmisc.a + $(CC) $(LFLAGS) -o $(RESIZER) $(OBJS) -lmisc + +clean: + rm -f *.o $(RESIZER) *~ + +dep: + gcc -MM $(IDIRS) *.c > .depend + +install: + cp -f $(RESIZER) $(SBIN) + +uninstall: + rm -f $(SBIN)/resize_reiserfs + + +ifeq (.depend,$(wildcard .depend)) +include .depend +endif + + + + + + + + + diff -u -r --new-file linux/fs/reiserfs/utils/resize_reiserfs/fe.c v2.4.0-test8/linux/fs/reiserfs/utils/resize_reiserfs/fe.c --- linux/fs/reiserfs/utils/resize_reiserfs/fe.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/resize_reiserfs/fe.c Sun May 14 23:15:05 2000 @@ -0,0 +1,44 @@ +/* + * Copyright 1999 Hans Reiser, see README file for licensing details. + */ + +#include <sys/mount.h> +#include <sys/types.h> +#include <asm/types.h> +#include <errno.h> +#include <stdio.h> +#include <mntent.h> +#include <string.h> +#include "misc.h" +#include "resize.h" + +/* the front-end for kernel on-line resizer */ +int resize_fs_online(char * devname, unsigned long blocks) +{ + static char buf[40]; + FILE * f; + struct mntent * mnt; + + if ((f = setmntent (MOUNTED, "r")) == NULL) + goto fail; + + while ((mnt = getmntent (f)) != NULL) + if(strcmp(devname, mnt->mnt_fsname) == 0) { + + if (strcmp(mnt->mnt_type,"reiserfs")) + die ("resize_reiserfs: can\'t resize fs other than reiserfs\n"); + + sprintf(buf,"resize=%lu", blocks); + + if (mount(mnt->mnt_fsname, mnt->mnt_dir, mnt->mnt_type, + (unsigned long)(MS_MGC_VAL | MS_REMOUNT), buf)) + die ("resize_reiserfs: remount failed: %s\n", strerror(errno)); + + endmntent(f); + return 0; + } +fail: + die ("resize_reiserfs: can\t find mount entry\n"); + return 1; +} + diff -u -r --new-file linux/fs/reiserfs/utils/resize_reiserfs/resize_reiserfs.c v2.4.0-test8/linux/fs/reiserfs/utils/resize_reiserfs/resize_reiserfs.c --- linux/fs/reiserfs/utils/resize_reiserfs/resize_reiserfs.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/utils/resize_reiserfs/resize_reiserfs.c Thu Aug 10 19:09:06 2000 @@ -0,0 +1,296 @@ +/* Copyright 1999 Hans Reiser, see README file for licensing details. + * + * Written by Alexander Zarochentcev. + * + * FS resize utility + * + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <asm/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <sys/vfs.h> +#include <time.h> +#include <sys/ioctl.h> +#include <sys/mount.h> + + +#include "inode.h" +#include "io.h" +#include "sb.h" +#include "misc.h" +#include "reiserfs_fs.h" +#include "resize.h" + +#define print_usage_and_exit()\ + die ("Usage: %s -s[+|-]#[M|K] [-fvn] device", argv[0]) + +struct buffer_head * g_sb_bh; + /* use of a long is a 2.2 Linux VFS + limitation, review this decision for + 2.3 and/or LFS patch. -Hans */ +unsigned long g_block_count_new; +int g_bmap_nr_new; + +int force = 0; +int verbose = 0; +int nowrite = 0; + +/* Given a file descriptor and an offset, check whether the offset is + a valid offset for the file - return 0 if it isn't valid or 1 if it + is */ +int valid_offset( int fd, loff_t offset ) +{ + char ch; + + if (reiserfs_llseek (fd, offset, 0) < 0) + return 0; + + if (read (fd, &ch, 1) < 1) + return 0; + + return 1; +} + + /* A bunch of these functions look like + they could be shared with those in + super.c or the utils, can they? + If so, then do so. -Hans */ +static void read_superblock(int dev) { + int bs; + struct reiserfs_super_block * sb; + + g_sb_bh = bread(dev, (REISERFS_DISK_OFFSET_IN_BYTES / 1024), 1024); + if (!g_sb_bh) + die ("resize_reiserfs: can\'t read superblock\n"); + sb = (struct reiserfs_super_block *)g_sb_bh->b_data; + + if(strncmp(sb->s_magic, REISERFS_SUPER_MAGIC_STRING, sizeof(REISERFS_SUPER_MAGIC_STRING) - 1) ) + die ("resize_reiserfs: device doesn\'t contain valid reiserfs\n"); + + bs = sb->s_blocksize; + brelse(g_sb_bh); + + g_sb_bh = bread(dev, REISERFS_DISK_OFFSET_IN_BYTES / bs, bs); + if (!g_sb_bh) + die ("resize_reiserfs: can\'t read superblock\n"); + if (g_sb_bh->b_blocknr >= le32_to_cpu(sb->s_journal_block)) + die ("resize_reiserfs: can\'t read superblock\n"); +} + +/* calculate the new fs size (in blocks) from old fs size and the string + representation of new size */ +static unsigned long calc_new_fs_size(unsigned long count, + int bs, char *bytes_str) { + long long int bytes; + unsigned long blocks; + int c; + + bytes = atoll(bytes_str); + c = bytes_str[strlen(bytes_str) - 1]; + + switch (c) { + case 'M': + case 'm': + bytes *= 1024; + case 'K': + case 'k': + bytes *= 1024; + } + + blocks = bytes / bs; + + if (bytes_str[0] == '+' || bytes_str[0] == '-') + return (count + blocks); + + return blocks; +} + +/* print some fs parameters */ +static void sb_report(struct reiserfs_super_block * sb1, + struct reiserfs_super_block * sb2){ + printf( + "ReiserFS report:\n" + "blocksize %d\n" + "block count %d (%d)\n" + "free blocks %d (%d)\n" + "bitmap block count %d (%d)\n", + sb1->s_blocksize, + sb1->s_block_count, sb2->s_block_count, + sb1->s_free_blocks, sb2->s_free_blocks, + sb1->s_bmap_nr, sb2->s_bmap_nr); +}; + +/* read i-th bitmap block */ +static struct buffer_head * get_bm_blk (int dev, int ind, int bs) { + if (ind == 0) + return bread(g_sb_bh->b_dev, REISERFS_DISK_OFFSET_IN_BYTES / bs + 1 ,bs); + return bread(dev, ind * bs * 8, bs); +} + +/* conditional bwrite */ +static int bwrite_cond (struct buffer_head * bh) { + if(!nowrite) { + mark_buffer_uptodate(bh,0); + mark_buffer_dirty(bh,0); + return bwrite(bh); + } + return 0; +} + + +/* the first one of the mainest functions */ +int expand_fs(void) { + struct reiserfs_super_block * sb; + struct buffer_head * bm_bh; + int block_r, block_r_new; + int i; + + sb = (struct reiserfs_super_block *) g_sb_bh->b_data; + + /* count used bits in last bitmap block */ + block_r = sb->s_block_count - + ((sb->s_bmap_nr - 1) * sb->s_blocksize * 8); + + /* count bitmap blocks in new fs */ + g_bmap_nr_new = g_block_count_new / (sb->s_blocksize * 8); + block_r_new = g_block_count_new - + g_bmap_nr_new * sb->s_blocksize * 8; + if(block_r_new) + g_bmap_nr_new++; + else + block_r_new = sb->s_blocksize * 8; + + /* clear bits in last bitmap block (old layout) */ + bm_bh = get_bm_blk(g_sb_bh->b_dev, sb->s_bmap_nr - 1, sb->s_blocksize); + for (i = block_r; i < sb->s_blocksize * 8; i++) + clear_bit(i, bm_bh->b_data); + bwrite_cond(bm_bh); + + /* add new bitmap blocks */ + for (i = sb->s_bmap_nr; i < g_bmap_nr_new; i++) { + memset(bm_bh->b_data, 0, bm_bh->b_size); + set_bit(0, bm_bh->b_data); + bm_bh->b_blocknr = /* It is not a first BM block */ + i * sb->s_blocksize * 8; /* with special location */ + bwrite_cond(bm_bh); + } + + /* set unused bits in last bitmap block (new layout) */ + for (i = block_r_new; i < sb->s_blocksize * 8; i++) + set_bit(i, bm_bh->b_data); + bwrite_cond(bm_bh); + + /* update super block buffer*/ + sb->s_free_blocks += g_block_count_new - sb->s_block_count + - (g_bmap_nr_new - sb->s_bmap_nr); + sb->s_block_count = g_block_count_new; + sb->s_bmap_nr = g_bmap_nr_new; + + /* commit changes */ + bwrite_cond(g_sb_bh); + + brelse(g_sb_bh); + brelse(bm_bh); + + return 0; +} + +int main(int argc, char *argv[]) { + char * bytes_count_str = NULL; + char * devname; + struct stat statbuf; + int c; + + int dev; + struct reiserfs_super_block *sb, *sb_old; + + while ((c = getopt(argc, argv, "fvns:")) != EOF) { + switch (c) { + case 's' : + if (!optarg) + die("%s: Missing argument to -s option", argv[0]); + bytes_count_str = optarg; + break; + case 'f': + force = 1; + break; + case 'v': + verbose = 1; + break; + case 'n': + nowrite = 1; + break; + default: + print_usage_and_exit (); + } + } + + if (optind == argc || (!bytes_count_str)) + print_usage_and_exit(); + devname = argv[optind]; + + /* open_device will die if it could not open device */ + dev = open (devname, O_RDWR); + if (dev == -1) + die ("%s: can not open '%s': %s", argv[0], devname, strerror (errno)); + + if (fstat (dev, &statbuf) < 0) + die ("%s: unable to stat %s", argv[0], devname); + + if (!S_ISBLK (statbuf.st_mode) && force ) + die ("%s: '%s (%o)' is not a block device", + argv[0], devname, statbuf.st_mode); + + read_superblock(dev); + + sb = (struct reiserfs_super_block *) g_sb_bh->b_data; + g_block_count_new = calc_new_fs_size(sb->s_block_count, + sb->s_blocksize, bytes_count_str); + if (is_mounted (devname)) { + close(dev); + if (!force) + die ("%s: '%s' contains a mounted file system,\n" + "\tspecify -f option to resize the fs online\n", + argv[0], devname); + resize_fs_online(devname, g_block_count_new); + return 0; + } + + if (sb->s_state != REISERFS_VALID_FS) + die ("%s: the file system isn't in valid state\n"); + + if(!valid_offset(dev, (loff_t) g_block_count_new * sb->s_blocksize - 1)) + die ("%s: %s too small", argv[0], devname); + + sb_old = 0; /* Needed to keep idiot compiler from issuing false warning */ + /* save SB for reporting */ + if(verbose) { + sb_old = getmem(sizeof(struct reiserfs_super_block)); + memcpy(sb_old, sb, sizeof(struct reiserfs_super_block)); + } + + if (g_block_count_new == sb->s_block_count) + die ("%s: Calculated fs size is the same as the previous one.", + argv[0]); + if (g_block_count_new > sb->s_block_count) + expand_fs(); + else + die ("%s: FS shrinking is not implemented yet", argv[0]); + + if(verbose) { + sb_report(sb, sb_old); + freemem(sb_old); + } + + check_and_free_mem (); + close(dev); + + return 0; +} diff -u -r --new-file linux/fs/reiserfs/version.c v2.4.0-test8/linux/fs/reiserfs/version.c --- linux/fs/reiserfs/version.c Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/fs/reiserfs/version.c Thu Sep 21 12:28:56 2000 @@ -0,0 +1,7 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ + +char *reiserfs_get_version_string(void) { + return "ReiserFS version 3.6.17" ; +} diff -u -r --new-file linux/include/asm-i386/errno.h v2.4.0-test8/linux/include/asm-i386/errno.h --- linux/include/asm-i386/errno.h Mon Jan 24 18:57:20 2000 +++ v2.4.0-test8/linux/include/asm-i386/errno.h Sun May 14 23:15:05 2000 @@ -128,5 +128,6 @@ #define ENOMEDIUM 123 /* No medium found */ #define EMEDIUMTYPE 124 /* Wrong medium type */ +#define EHASHCOLLISION 125 /* Number of hash collisons exceeds maximum generation counter value. */ #endif diff -u -r --new-file linux/include/linux/fs.h v2.4.0-test8/linux/include/linux/fs.h --- linux/include/linux/fs.h Mon Sep 11 15:22:47 2000 +++ v2.4.0-test8/linux/include/linux/fs.h Mon Sep 11 05:21:52 2000 @@ -282,6 +282,7 @@ #include <linux/hfs_fs_i.h> #include <linux/adfs_fs_i.h> #include <linux/qnx4_fs_i.h> +#include <linux/reiserfs_fs_i.h> #include <linux/bfs_fs_i.h> #include <linux/udf_fs_i.h> #include <linux/ncp_fs_i.h> @@ -436,6 +437,7 @@ struct hfs_inode_info hfs_i; struct adfs_inode_info adfs_i; struct qnx4_inode_info qnx4_i; + struct reiserfs_inode_info reiserfs_i; struct bfs_inode_info bfs_i; struct udf_inode_info udf_i; struct ncp_inode_info ncpfs_i; @@ -615,6 +617,7 @@ #include <linux/hfs_fs_sb.h> #include <linux/adfs_fs_sb.h> #include <linux/qnx4_fs_sb.h> +#include <linux/reiserfs_fs_sb.h> #include <linux/bfs_fs_sb.h> #include <linux/udf_fs_sb.h> #include <linux/ncp_fs_sb.h> @@ -662,6 +665,7 @@ struct hfs_sb_info hfs_sb; struct adfs_sb_info adfs_sb; struct qnx4_sb_info qnx4_sb; + struct reiserfs_sb_info reiserfs_sb; struct bfs_sb_info bfs_sb; struct udf_sb_info udf_sb; struct ncp_sb_info ncpfs_sb; @@ -775,6 +779,15 @@ */ struct super_operations { void (*read_inode) (struct inode *); + + /* reiserfs kludge. reiserfs needs 64 bits of information to + ** find an inode. We are using the read_inode2 call to get + ** that information. We don't like this, and are waiting on some + ** VFS changes for the real solution. + ** iget4 calls read_inode2, iff it is defined + */ + void (*read_inode2) (struct inode *, void *) ; + void (*dirty_inode) (struct inode *); void (*write_inode) (struct inode *, int); void (*put_inode) (struct inode *); void (*delete_inode) (struct inode *); diff -u -r --new-file linux/include/linux/reiserfs_fs.h v2.4.0-test8/linux/include/linux/reiserfs_fs.h --- linux/include/linux/reiserfs_fs.h Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/include/linux/reiserfs_fs.h Thu Sep 14 13:07:11 2000 @@ -0,0 +1,2003 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ +#ifndef _LINUX_REISER_FS_H +#define _LINUX_REISER_FS_H + + +#include <linux/types.h> +#ifdef __KERNEL__ +#include <linux/malloc.h> +#include <linux/tqueue.h> +#endif + +/* + * include/linux/reiser_fs.h + * + * Reiser File System constants and structures + * + */ + +/* in reading the #defines, it may help to understand that they employ + the following abbreviations: + + B = Buffer + I = Item header + H = Height within the tree (should be changed to LEV) + N = Number of the item in the node + STAT = stat data + DEH = Directory Entry Header + EC = Entry Count + E = Entry number + UL = Unsigned Long + BLKH = BLocK Header + UNFM = UNForMatted node + DC = Disk Child + P = Path + + These #defines are named by concatenating these abbreviations, + where first comes the arguments, and last comes the return value, + of the macro. + +*/ + +/* NEW_GET_NEW_BUFFER will try to allocate new blocks better */ +/*#define NEW_GET_NEW_BUFFER*/ +#define OLD_GET_NEW_BUFFER + +/* if this is undefined, all inode changes get into stat data immediately, if it can be found in RAM */ +#define DIRTY_LATER + +/* enable journalling */ +#define ENABLE_JOURNAL + +#ifdef __KERNEL__ + +#define REISERFS_CHECK + +#define REISERFS_PREALLOCATE +#endif + +/* if this is undefined, all inode changes get into stat data + immediately, if it can be found in RAM */ +#define DIRTY_LATER + + +/*#define READ_LOCK_REISERFS*/ + + +/* n must be power of 2 */ +#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u)) + +// to be ok for alpha and others we have to align structures to 8 byte +// boundary. +// FIXME: do not change 4 by anything else: there is code which relies on that +#define ROUND_UP(x) _ROUND_UP(x,8LL) + +/* debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug +** messages. +*/ +#define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ + +/* + * Disk Data Structures + */ + +/***************************************************************************/ +/* SUPER BLOCK */ +/***************************************************************************/ + +/* + * Structure of super block on disk, a version of which in RAM is often accessed as s->u.reiserfs_sb.s_rs + * the version in RAM is part of a larger structure containing fields never written to disk. + */ + + /* used by gcc */ +#define REISERFS_SUPER_MAGIC 0x52654973 + /* used by file system utilities that + look at the superblock, etc. */ +#define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" +#define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" + +extern inline int is_reiserfs_magic_string (struct reiserfs_super_block * rs) +{ + return (!strncmp (rs->s_magic, REISERFS_SUPER_MAGIC_STRING, + strlen ( REISERFS_SUPER_MAGIC_STRING)) || + !strncmp (rs->s_magic, REISER2FS_SUPER_MAGIC_STRING, + strlen ( REISER2FS_SUPER_MAGIC_STRING))); +} + + /* ReiserFS leaves the first 64k unused, + so that partition labels have enough + space. If someone wants to write a + fancy bootloader that needs more than + 64k, let us know, and this will be + increased in size. This number must + be larger than than the largest block + size on any platform, or code will + break. -Hans */ +#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) +#define REISERFS_FIRST_BLOCK unused_define + +/* the spot for the super in versions 3.5 - 3.5.10 (inclusive) */ +#define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024) + + +// reiserfs internal error code (used by search_by_key adn fix_nodes)) +#define CARRY_ON 0 +#define REPEAT_SEARCH -1 +#define IO_ERROR -2 +#define NO_DISK_SPACE -3 +#define NO_BALANCING_NEEDED (-4) +#define NO_MORE_UNUSED_CONTIGUOUS_BLOCKS (-5) + +//#define SCHEDULE_OCCURRED 1 +//#define PATH_INCORRECT 2 + +//#define NO_DISK_SPACE (-1) + + + +typedef unsigned long b_blocknr_t; +typedef __u32 unp_t; + +struct unfm_nodeinfo { + unp_t unfm_nodenum; + unsigned short unfm_freespace; +}; + + +/* when reiserfs_file_write is called with a byte count >= MIN_PACK_ON_CLOSE, +** it sets the inode to pack on close, and when extending the file, will only +** use unformatted nodes. +** +** This is a big speed up for the journal, which is badly hurt by direct->indirect +** conversions (they must be logged). +*/ +#define MIN_PACK_ON_CLOSE 512 + +/* the defines below say, that if file size is >= + DIRECT_TAIL_SUPPRESSION_SIZE * blocksize, then if tail is longer + than MAX_BYTES_SUPPRESS_DIRECT_TAIL, it will be stored in + unformatted node */ +#define DIRECT_TAIL_SUPPRESSION_SIZE 1024 +#define MAX_BYTES_SUPPRESS_DIRECT_TAIL 1024 + +#if 0 + +// +#define mark_file_with_tail(inode,offset) \ +{\ +inode->u.reiserfs_i.i_has_tail = 1;\ +} + +#define mark_file_without_tail(inode) \ +{\ +inode->u.reiserfs_i.i_has_tail = 0;\ +} + +#endif + +// this says about version of all items (but stat data) the object +// consists of +#define inode_items_version(inode) ((inode)->u.reiserfs_i.i_version) + + +/* We store tail in unformatted node if it is too big to fit into a + formatted node or if DIRECT_TAIL_SUPPRESSION_SIZE, + MAX_BYTES_SUPPRESS_DIRECT_TAIL and file size say that. */ +/* #define STORE_TAIL_IN_UNFM(n_file_size,n_tail_size,n_block_size) \ */ +/* ( ((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \ */ +/* ( ( (n_file_size) >= (n_block_size) * DIRECT_TAIL_SUPPRESSION_SIZE ) && \ */ +/* ( (n_tail_size) >= MAX_BYTES_SUPPRESS_DIRECT_TAIL ) ) ) */ + + /* This is an aggressive tail suppression policy, I am hoping it + improves our benchmarks. The principle behind it is that + percentage space saving is what matters, not absolute space + saving. This is non-intuitive, but it helps to understand it if + you consider that the cost to access 4 blocks is not much more + than the cost to access 1 block, if you have to do a seek and + rotate. A tail risks a non-linear disk access that is + significant as a percentage of total time cost for a 4 block file + and saves an amount of space that is less significant as a + percentage of space, or so goes the hypothesis. -Hans */ +#define STORE_TAIL_IN_UNFM(n_file_size,n_tail_size,n_block_size) \ +(\ + (!(n_tail_size)) || \ + (((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \ + ( (n_file_size) >= (n_block_size) * 4 ) || \ + ( ( (n_file_size) >= (n_block_size) * 3 ) && \ + ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/4) ) || \ + ( ( (n_file_size) >= (n_block_size) * 2 ) && \ + ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/2) ) || \ + ( ( (n_file_size) >= (n_block_size) ) && \ + ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \ +) + + +/* + * values for s_state field + */ +#define REISERFS_VALID_FS 1 +#define REISERFS_ERROR_FS 2 + + + +/***************************************************************************/ +/* KEY & ITEM HEAD */ +/***************************************************************************/ + +// +// we do support for old format of reiserfs: the problem is to +// distinuquish keys with 32 bit offset and keys with 60 bit ones. On +// leaf level we use ih_version of struct item_head (was +// ih_reserved). For all old items it is set to 0 +// (ITEM_VERSION_1). For new items it is ITEM_VERSION_2. On internal +// levels we have to know version of item key belongs to. +// +#define ITEM_VERSION_1 0 +#define ITEM_VERSION_2 1 + + +/* loff_t - long long */ + + +// +// directories use this key as well as old files +// +struct offset_v1 { + __u32 k_offset; + __u32 k_uniqueness; +}; + +struct offset_v2 { + __u64 k_offset:60; + __u64 k_type: 4; +}; + + + +/* Key of an item determines its location in the S+tree, and + is composed of 4 components */ +struct key { + __u32 k_dir_id; /* packing locality: by default parent + directory object id */ + __u32 k_objectid; /* object identifier */ + union { + struct offset_v1 k_offset_v1; + struct offset_v2 k_offset_v2; + } u; +}; + + +struct cpu_key { + struct key on_disk_key; + int version; + int key_length; /* 3 in all cases but direct2indirect and + indirect2direct conversion */ +}; + + + + + + + + /* Our function for comparing keys can compare keys of different + lengths. It takes as a parameter the length of the keys it is to + compare. These defines are used in determining what is to be + passed to it as that parameter. */ +#define REISERFS_FULL_KEY_LEN 4 + +#define REISERFS_SHORT_KEY_LEN 2 + +/* The result of the key compare */ +#define FIRST_GREATER 1 +#define SECOND_GREATER -1 +#define KEYS_IDENTICAL 0 +#define KEY_FOUND 1 +#define KEY_NOT_FOUND 0 + + +#define KEY_SIZE (sizeof(struct key)) +#define SHORT_KEY_SIZE (sizeof (unsigned long) + sizeof (unsigned long)) + +/* return values for search_by_key and clones */ +#define ITEM_FOUND 1 +#define ITEM_NOT_FOUND 0 +#define ENTRY_FOUND 1 +#define ENTRY_NOT_FOUND 0 +#define DIRECTORY_NOT_FOUND -1 +#define REGULAR_FILE_FOUND -2 +#define DIRECTORY_FOUND -3 +#define BYTE_FOUND 1 +#define BYTE_NOT_FOUND 0 +#define FILE_NOT_FOUND -1 + +#define POSITION_FOUND 1 +#define POSITION_NOT_FOUND 0 + +// return values for reiserfs_find_entry and search_by_entry_key +#define NAME_FOUND 1 +#define NAME_NOT_FOUND 0 +#define GOTO_PREVIOUS_ITEM 2 +#define NAME_FOUND_INVISIBLE 3 + + + +/* Everything in the filesystem is stored as a set of items. The + item head contains the key of the item, its free space (for + indirect items) and specifies the location of the item itself + within the block. */ + +struct item_head +{ + struct key ih_key; /* Everything in the tree is found by searching for it based on its key.*/ + + union { + __u16 ih_free_space_reserved; /* The free space in the last unformatted node of an indirect item if this + is an indirect item. This equals 0xFFFF iff this is a direct item or + stat data item. Note that the key, not this field, is used to determine + the item type, and thus which field this union contains. */ + __u16 ih_entry_count; /* Iff this is a directory item, this field equals the number of directory + entries in the directory item. */ + } u; + __u16 ih_item_len; /* total size of the item body */ + __u16 ih_item_location; /* an offset to the item body within the block */ + __u16 ih_version; /* 0 for all old items, 2 for new + ones. Highest bit is set by fsck + temporary, cleaned after all done */ +}; +/* size of item header */ +#define IH_SIZE (sizeof(struct item_head)) + +#define ih_free_space(ih) (le16_to_cpu ((ih)->u.ih_free_space_reserved)) +#define ih_version(ih) (le16_to_cpu ((ih)->ih_version)) +#define ih_entry_count(ih) (le16_to_cpu ((ih)->u.ih_entry_count)) +#define ih_location(ih) (le16_to_cpu ((ih)->ih_item_location)) +#define ih_item_len(ih) (le16_to_cpu ((ih)->ih_item_len)) + + +// FIXME: now would that work for other than i386 archs +#define unreachable_item(ih) (ih->ih_version & (1 << 15)) + + +#define get_ih_free_space(ih) (ih_version (ih) == ITEM_VERSION_2 ? 0 : ih_free_space (ih)) +#define set_ih_free_space(ih,val) (ih_free_space (ih) = (ih_version (ih) == ITEM_VERSION_2 ? 0 : val)) + + +// +// there are 5 item types currently +// +#define TYPE_STAT_DATA 0 +#define TYPE_INDIRECT 1 +#define TYPE_DIRECT 2 +#define TYPE_DIRENTRY 3 +#define TYPE_ANY 15 // FIXME: comment is required + +// +// in old version uniqueness field shows key type +// +#define V1_SD_UNIQUENESS 0 +#define V1_INDIRECT_UNIQUENESS 0xfffffffe +#define V1_DIRECT_UNIQUENESS 0xffffffff +#define V1_DIRENTRY_UNIQUENESS 500 +#define V1_ANY_UNIQUENESS 555 // FIXME: comment is required + +// +// here are conversion routines +// +extern inline int uniqueness2type (__u32 uniqueness) +{ + switch (uniqueness) { + case V1_SD_UNIQUENESS: return TYPE_STAT_DATA; + case V1_INDIRECT_UNIQUENESS: return TYPE_INDIRECT; + case V1_DIRECT_UNIQUENESS: return TYPE_DIRECT; + case V1_DIRENTRY_UNIQUENESS: return TYPE_DIRENTRY; + } +/* + if (uniqueness != V1_ANY_UNIQUENESS) { + printk ("uniqueness %d\n", uniqueness); + BUG (); + } +*/ + return TYPE_ANY; +} + +extern inline __u32 type2uniqueness (int type) +{ + switch (type) { + case TYPE_STAT_DATA: return V1_SD_UNIQUENESS; + case TYPE_INDIRECT: return V1_INDIRECT_UNIQUENESS; + case TYPE_DIRECT: return V1_DIRECT_UNIQUENESS; + case TYPE_DIRENTRY: return V1_DIRENTRY_UNIQUENESS; + } + /* + if (type != TYPE_ANY) + BUG (); + */ + return V1_ANY_UNIQUENESS; +} + + +// +// key is pointer to on disk key which is stored in le, result is cpu, +// there is no way to get version of object from key, so, provide +// version to these defines +// +extern inline loff_t le_key_k_offset (int version, struct key * key) +{ + return (version == ITEM_VERSION_1) ? key->u.k_offset_v1.k_offset : + le64_to_cpu (key->u.k_offset_v2.k_offset); +} +extern inline loff_t le_ih_k_offset (struct item_head * ih) +{ + return le_key_k_offset (ih_version (ih), &(ih->ih_key)); +} + + +extern inline loff_t le_key_k_type (int version, struct key * key) +{ + return (version == ITEM_VERSION_1) ? uniqueness2type (key->u.k_offset_v1.k_uniqueness) : + le16_to_cpu (key->u.k_offset_v2.k_type); +} +extern inline loff_t le_ih_k_type (struct item_head * ih) +{ + return le_key_k_type (ih_version (ih), &(ih->ih_key)); +} + + +extern inline void set_le_key_k_offset (int version, struct key * key, loff_t offset) +{ + (version == ITEM_VERSION_1) ? (key->u.k_offset_v1.k_offset = offset) : + (key->u.k_offset_v2.k_offset = cpu_to_le64 (offset)); +} +extern inline void set_le_ih_k_offset (struct item_head * ih, loff_t offset) +{ + set_le_key_k_offset (ih_version (ih), &(ih->ih_key), offset); +} + + + +extern inline void set_le_key_k_type (int version, struct key * key, int type) +{ + (version == ITEM_VERSION_1) ? (key->u.k_offset_v1.k_uniqueness = type2uniqueness (type)) : + (key->u.k_offset_v2.k_type = cpu_to_le16 (type)); +} +extern inline void set_le_ih_k_type (struct item_head * ih, int type) +{ + set_le_key_k_type (ih_version (ih), &(ih->ih_key), type); +} + + +#define is_direntry_le_key(version,key) (le_key_k_type (version, key) == TYPE_DIRENTRY) +#define is_direct_le_key(version,key) (le_key_k_type (version, key) == TYPE_DIRECT) +#define is_indirect_le_key(version,key) (le_key_k_type (version, key) == TYPE_INDIRECT) +#define is_statdata_le_key(version,key) (le_key_k_type (version, key) == TYPE_STAT_DATA) + +// +// item header has version. +// +#define is_direntry_le_ih(ih) is_direntry_le_key (ih_version (ih), &((ih)->ih_key)) +#define is_direct_le_ih(ih) is_direct_le_key (ih_version (ih), &((ih)->ih_key)) +#define is_indirect_le_ih(ih) is_indirect_le_key (ih_version(ih), &((ih)->ih_key)) +#define is_statdata_le_ih(ih) is_statdata_le_key (ih_version (ih), &((ih)->ih_key)) + + + +// +// key is pointer to cpu key, result is cpu +// +extern inline loff_t cpu_key_k_offset (struct cpu_key * key) +{ + return (key->version == ITEM_VERSION_1) ? key->on_disk_key.u.k_offset_v1.k_offset : + key->on_disk_key.u.k_offset_v2.k_offset; +} + +extern inline loff_t cpu_key_k_type (struct cpu_key * key) +{ + return (key->version == ITEM_VERSION_1) ? uniqueness2type (key->on_disk_key.u.k_offset_v1.k_uniqueness) : + key->on_disk_key.u.k_offset_v2.k_type; +} + +extern inline void set_cpu_key_k_offset (struct cpu_key * key, loff_t offset) +{ + (key->version == ITEM_VERSION_1) ? (key->on_disk_key.u.k_offset_v1.k_offset = offset) : + (key->on_disk_key.u.k_offset_v2.k_offset = offset); +} + + +extern inline void set_cpu_key_k_type (struct cpu_key * key, int type) +{ + (key->version == ITEM_VERSION_1) ? (key->on_disk_key.u.k_offset_v1.k_uniqueness = type2uniqueness (type)) : + (key->on_disk_key.u.k_offset_v2.k_type = type); +} + +extern inline void cpu_key_k_offset_dec (struct cpu_key * key) +{ + if (key->version == ITEM_VERSION_1) + key->on_disk_key.u.k_offset_v1.k_offset --; + else + key->on_disk_key.u.k_offset_v2.k_offset --; +} + + +#define is_direntry_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRENTRY) +#define is_direct_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRECT) +#define is_indirect_cpu_key(key) (cpu_key_k_type (key) == TYPE_INDIRECT) +#define is_statdata_cpu_key(key) (cpu_key_k_type (key) == TYPE_STAT_DATA) + + +/* are these used ? */ +#define is_direntry_cpu_ih(ih) (is_direntry_cpu_key (&((ih)->ih_key))) +#define is_direct_cpu_ih(ih) (is_direct_cpu_key (&((ih)->ih_key))) +#define is_indirect_cpu_ih(ih) (is_indirect_cpu_key (&((ih)->ih_key))) +#define is_statdata_cpu_ih(ih) (is_statdata_cpu_key (&((ih)->ih_key))) + + + + + +#define I_K_KEY_IN_ITEM(p_s_ih, p_s_key, n_blocksize) \ + ( ! COMP_SHORT_KEYS(p_s_ih, p_s_key) && \ + I_OFF_BYTE_IN_ITEM(p_s_ih, k_offset (p_s_key), n_blocksize) ) + +/* maximal length of item */ +#define MAX_ITEM_LEN(block_size) (block_size - BLKH_SIZE - IH_SIZE) +#define MIN_ITEM_LEN 1 + + +/* object identifier for root dir */ +#define REISERFS_ROOT_OBJECTID 2 +#define REISERFS_ROOT_PARENT_OBJECTID 1 +extern struct key root_key; + + + + +/* + * Picture represents a leaf of the S+tree + * ______________________________________________________ + * | | Array of | | | + * |Block | Object-Item | F r e e | Objects- | + * | head | Headers | S p a c e | Items | + * |______|_______________|___________________|___________| + */ + +/* Header of a disk block. More precisely, header of a formatted leaf + or internal node, and not the header of an unformatted node. */ +struct block_head { + __u16 blk_level; /* Level of a block in the tree. */ + __u16 blk_nr_item; /* Number of keys/items in a block. */ + __u16 blk_free_space; /* Block free space in bytes. */ + __u16 blk_reserved; + struct key blk_right_delim_key; /* kept only for compatibility */ +}; + +#define BLKH_SIZE (sizeof(struct block_head)) + +/* + * values for blk_level field of the struct block_head + */ + +#define FREE_LEVEL 0 /* when node get removed off the tree - its + blk_level is set to FREE_LEVEL. It is them + used to see whether the node is still in the + tree */ + +#define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level.*/ + +/* Given the buffer head of a formatted node, resolve to the block head of that node. */ +#define B_BLK_HEAD(p_s_bh) ((struct block_head *)((p_s_bh)->b_data)) +/* Number of items that are in buffer. */ +#define B_NR_ITEMS(p_s_bh) (le16_to_cpu ( B_BLK_HEAD(p_s_bh)->blk_nr_item )) +#define B_LEVEL(bh) (le16_to_cpu ( B_BLK_HEAD(bh)->blk_level )) +#define B_FREE_SPACE(bh) (le16_to_cpu ( B_BLK_HEAD(bh)->blk_free_space )) +/* Get right delimiting key. */ +#define B_PRIGHT_DELIM_KEY(p_s_bh) ( &(B_BLK_HEAD(p_s_bh)->blk_right_delim_key) ) + +/* Does the buffer contain a disk leaf. */ +#define B_IS_ITEMS_LEVEL(p_s_bh) ( B_BLK_HEAD(p_s_bh)->blk_level == DISK_LEAF_NODE_LEVEL ) + +/* Does the buffer contain a disk internal node */ +#define B_IS_KEYS_LEVEL(p_s_bh) ( B_BLK_HEAD(p_s_bh)->blk_level > DISK_LEAF_NODE_LEVEL &&\ + B_BLK_HEAD(p_s_bh)->blk_level <= MAX_HEIGHT ) + + + + +/***************************************************************************/ +/* STAT DATA */ +/***************************************************************************/ + + +// +// old stat data is 32 bytes long. We are going to distinguish new one by +// different size +// +struct stat_data_v1 +{ + __u16 sd_mode; /* file type, permissions */ + __u16 sd_nlink; /* number of hard links */ + __u16 sd_uid; /* owner */ + __u16 sd_gid; /* group */ + __u32 sd_size; /* file size */ + __u32 sd_atime; /* time of last access */ + __u32 sd_mtime; /* time file was last modified */ + __u32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ + union { + __u32 sd_rdev; + __u32 sd_blocks; /* number of blocks file uses */ + } u; + __u32 sd_first_direct_byte; /* first byte of file which is stored + in a direct item: except that if it + equals 1 it is a symlink and if it + equals ~(__u32)0 there is no + direct item. The existence of this + field really grates on me. Let's + replace it with a macro based on + sd_size and our tail suppression + policy. Someday. -Hans */ +}; + +#define SD_V1_SIZE (sizeof(struct stat_data_v1)) + + +/* Stat Data on disk (reiserfs version of UFS disk inode minus the + address blocks) */ +struct stat_data { + __u16 sd_mode; /* file type, permissions */ + __u16 sd_reserved; + __u32 sd_nlink; /* number of hard links */ + __u64 sd_size; /* file size */ + __u32 sd_uid; /* owner */ + __u32 sd_gid; /* group */ + __u32 sd_atime; /* time of last access */ + __u32 sd_mtime; /* time file was last modified */ + __u32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ + __u32 sd_blocks; + union { + __u32 sd_rdev; + //__u32 sd_first_direct_byte; + /* first byte of file which is stored in a + direct item: except that if it equals 1 + it is a symlink and if it equals + ~(__u32)0 there is no direct item. The + existence of this field really grates + on me. Let's replace it with a macro + based on sd_size and our tail + suppression policy? */ + } u; +}; +// +// this is 40 bytes long +// +#define SD_SIZE (sizeof(struct stat_data)) + +#define stat_data_v1(ih) (ih_version (ih) == ITEM_VERSION_1) + + +/***************************************************************************/ +/* DIRECTORY STRUCTURE */ +/***************************************************************************/ +/* + Picture represents the structure of directory items + ________________________________________________ + | Array of | | | | | | + | directory |N-1| N-2 | .... | 1st |0th| + | entry headers | | | | | | + |_______________|___|_____|________|_______|___| + <---- directory entries ------> + + First directory item has k_offset component 1. We store "." and ".." + in one item, always, we never split "." and ".." into differing + items. This makes, among other things, the code for removing + directories simpler. */ +#define SD_OFFSET 0 +#define SD_UNIQUENESS 0 +#define DOT_OFFSET 1 +#define DOT_DOT_OFFSET 2 +#define DIRENTRY_UNIQUENESS 500 + +/* */ +#define FIRST_ITEM_OFFSET 1 + +/* + Q: How to get key of object pointed to by entry from entry? + + A: Each directory entry has its header. This header has deh_dir_id and deh_objectid fields, those are key + of object, entry points to */ + +/* NOT IMPLEMENTED: + Directory will someday contain stat data of object */ + + + +struct reiserfs_de_head +{ + __u32 deh_offset; /* third component of the directory entry key */ + __u32 deh_dir_id; /* objectid of the parent directory of the object, that is referenced + by directory entry */ + __u32 deh_objectid; /* objectid of the object, that is referenced by directory entry */ + __u16 deh_location; /* offset of name in the whole item */ + __u16 deh_state; /* whether 1) entry contains stat data (for future), and 2) whether + entry is hidden (unlinked) */ +}; +#define DEH_SIZE sizeof(struct reiserfs_de_head) + +/* empty directory contains two entries "." and ".." and their headers */ +#define EMPTY_DIR_SIZE \ +(DEH_SIZE * 2 + ROUND_UP (strlen (".")) + ROUND_UP (strlen (".."))) + +/* old format directories have this size when empty */ +#define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3) + +#define DEH_Statdata 0 /* not used now */ +#define DEH_Visible 2 + + +/* compose directory item containing "." and ".." entries (entries are + not aligned to 4 byte boundary) */ +extern inline void make_empty_dir_item_v1 (char * body, __u32 dirid, __u32 objid, + __u32 par_dirid, __u32 par_objid) +{ + struct reiserfs_de_head * deh; + + memset (body, 0, EMPTY_DIR_SIZE_V1); + deh = (struct reiserfs_de_head *)body; + + /* direntry header of "." */ + deh[0].deh_offset = cpu_to_le32 (DOT_OFFSET); + deh[0].deh_dir_id = cpu_to_le32 (dirid); + deh[0].deh_objectid = cpu_to_le32 (objid); + deh[0].deh_location = cpu_to_le16 (EMPTY_DIR_SIZE_V1 - strlen (".")); + deh[0].deh_state = 0; + set_bit (DEH_Visible, &(deh[0].deh_state)); + + /* direntry header of ".." */ + deh[1].deh_offset = cpu_to_le32 (DOT_DOT_OFFSET); + /* key of ".." for the root directory */ + deh[1].deh_dir_id = cpu_to_le32 (par_dirid); + deh[1].deh_objectid = cpu_to_le32 (par_objid); + deh[1].deh_location = cpu_to_le16 (le16_to_cpu (deh[0].deh_location) - strlen ("..")); + deh[1].deh_state = 0; + set_bit (DEH_Visible, &(deh[1].deh_state)); + + /* copy ".." and "." */ + memcpy (body + deh[0].deh_location, ".", 1); + memcpy (body + deh[1].deh_location, "..", 2); +} + + +/* compose directory item containing "." and ".." entries */ +extern inline void make_empty_dir_item (char * body, __u32 dirid, __u32 objid, + __u32 par_dirid, __u32 par_objid) +{ + struct reiserfs_de_head * deh; + + memset (body, 0, EMPTY_DIR_SIZE); + deh = (struct reiserfs_de_head *)body; + + /* direntry header of "." */ + deh[0].deh_offset = cpu_to_le32 (DOT_OFFSET); + deh[0].deh_dir_id = cpu_to_le32 (dirid); + deh[0].deh_objectid = cpu_to_le32 (objid); + deh[0].deh_location = cpu_to_le16 (EMPTY_DIR_SIZE - ROUND_UP (strlen ("."))); + deh[0].deh_state = 0; + set_bit (DEH_Visible, &(deh[0].deh_state)); + + /* direntry header of ".." */ + deh[1].deh_offset = cpu_to_le32 (DOT_DOT_OFFSET); + /* key of ".." for the root directory */ + deh[1].deh_dir_id = cpu_to_le32 (par_dirid); + deh[1].deh_objectid = cpu_to_le32 (par_objid); + deh[1].deh_location = cpu_to_le16 (le16_to_cpu (deh[0].deh_location) - ROUND_UP (strlen (".."))); + deh[1].deh_state = 0; + set_bit (DEH_Visible, &(deh[1].deh_state)); + + /* copy ".." and "." */ + memcpy (body + deh[0].deh_location, ".", 1); + memcpy (body + deh[1].deh_location, "..", 2); +} + + +#define deh_dir_id(deh) (__le32_to_cpu ((deh)->deh_dir_id)) +#define deh_objectid(deh) (__le32_to_cpu ((deh)->deh_objectid)) +#define deh_offset(deh) (__le32_to_cpu ((deh)->deh_offset)) + + +#define mark_de_with_sd(deh) set_bit (DEH_Statdata, &((deh)->deh_state)) +#define mark_de_without_sd(deh) clear_bit (DEH_Statdata, &((deh)->deh_state)) +#define mark_de_visible(deh) set_bit (DEH_Visible, &((deh)->deh_state)) +#define mark_de_hidden(deh) clear_bit (DEH_Visible, &((deh)->deh_state)) + +#define de_with_sd(deh) test_bit (DEH_Statdata, &((deh)->deh_state)) +#define de_visible(deh) test_bit (DEH_Visible, &((deh)->deh_state)) +#define de_hidden(deh) !test_bit (DEH_Visible, &((deh)->deh_state)) + +/* array of the entry headers */ + /* get item body */ +#define B_I_PITEM(bh,ih) ( (bh)->b_data + (ih)->ih_item_location ) +#define B_I_DEH(bh,ih) ((struct reiserfs_de_head *)(B_I_PITEM(bh,ih))) + +/* length of the directory entry in directory item. This define + calculates length of i-th directory entry using directory entry + locations from dir entry head. When it calculates length of 0-th + directory entry, it uses length of whole item in place of entry + location of the non-existent following entry in the calculation. + See picture above.*/ +/* +#define I_DEH_N_ENTRY_LENGTH(ih,deh,i) \ +((i) ? (((deh)-1)->deh_location - (deh)->deh_location) : ((ih)->ih_item_len) - (deh)->deh_location) +*/ +extern inline int entry_length (struct buffer_head * bh, struct item_head * ih, + int pos_in_item) +{ + struct reiserfs_de_head * deh; + + deh = B_I_DEH (bh, ih) + pos_in_item; + if (pos_in_item) + return (le16_to_cpu ((deh - 1)->deh_location) - le16_to_cpu (deh->deh_location)); + return (le16_to_cpu (ih->ih_item_len) - le16_to_cpu (deh->deh_location)); +} + + + +/* number of entries in the directory item, depends on ENTRY_COUNT being at the start of directory dynamic data. */ +#define I_ENTRY_COUNT(ih) ((ih)->u.ih_entry_count) + + +/* name by bh, ih and entry_num */ +#define B_I_E_NAME(bh,ih,entry_num) ((char *)(bh->b_data + ih->ih_item_location + (B_I_DEH(bh,ih)+(entry_num))->deh_location)) + +// two entries per block (at least) +//#define REISERFS_MAX_NAME_LEN(block_size) +//((block_size - BLKH_SIZE - IH_SIZE - DEH_SIZE * 2) / 2) + +// two entries per block (at least) +#define REISERFS_MAX_NAME_LEN(block_size) \ +((block_size - BLKH_SIZE - IH_SIZE - DEH_SIZE)) + + + + +/* this structure is used for operations on directory entries. It is + not a disk structure. */ +/* When reiserfs_find_entry or search_by_entry_key find directory + entry, they return filled reiserfs_dir_entry structure */ +struct reiserfs_dir_entry +{ + struct buffer_head * de_bh; + int de_item_num; + struct item_head * de_ih; + int de_entry_num; + struct reiserfs_de_head * de_deh; + int de_entrylen; + int de_namelen; + char * de_name; + char * de_gen_number_bit_string; + + __u32 de_dir_id; + __u32 de_objectid; + + struct cpu_key de_entry_key; +}; + +/* these defines are useful when a particular member of a reiserfs_dir_entry is needed */ + +/* pointer to file name, stored in entry */ +#define B_I_DEH_ENTRY_FILE_NAME(bh,ih,deh) (B_I_PITEM (bh, ih) + (deh)->deh_location) + +/* length of name */ +#define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \ +(I_DEH_N_ENTRY_LENGTH (ih, deh, entry_num) - (de_with_sd (deh) ? SD_SIZE : 0)) + + + +/* hash value occupies bits from 7 up to 30 */ +#define GET_HASH_VALUE(offset) ((offset) & 0x7fffff80LL) +/* generation number occupies 7 bits starting from 0 up to 6 */ +#define GET_GENERATION_NUMBER(offset) ((offset) & 0x7fLL) +#define MAX_GENERATION_NUMBER 127 + +#define SET_GENERATION_NUMBER(offset,gen_number) (GET_HASH_VALUE(offset)|(gen_number)) + + +/* + * Picture represents an internal node of the reiserfs tree + * ______________________________________________________ + * | | Array of | Array of | Free | + * |block | keys | pointers | space | + * | head | N | N+1 | | + * |______|_______________|___________________|___________| + */ + +/***************************************************************************/ +/* DISK CHILD */ +/***************************************************************************/ +/* Disk child pointer: The pointer from an internal node of the tree + to a node that is on disk. */ +struct disk_child { + __u32 dc_block_number; /* Disk child's block number. */ + __u16 dc_size; /* Disk child's used space. */ + __u16 dc_reserved; +}; + +#define DC_SIZE (sizeof(struct disk_child)) + +/* Get disk child by buffer header and position in the tree node. */ +#define B_N_CHILD(p_s_bh,n_pos) ((struct disk_child *)\ +((p_s_bh)->b_data+BLKH_SIZE+B_NR_ITEMS(p_s_bh)*KEY_SIZE+DC_SIZE*(n_pos))) + +/* Get disk child number by buffer header and position in the tree node. */ +#define B_N_CHILD_NUM(p_s_bh,n_pos) (le32_to_cpu (B_N_CHILD(p_s_bh,n_pos)->dc_block_number)) + + /* maximal value of field child_size in structure disk_child */ + /* child size is the combined size of all items and their headers */ +#define MAX_CHILD_SIZE(bh) ((int)( (bh)->b_size - BLKH_SIZE )) + +/* amount of used space in buffer (not including block head) */ +#define B_CHILD_SIZE(cur) (MAX_CHILD_SIZE(cur)-(B_FREE_SPACE(cur))) + +/* max and min number of keys in internal node */ +#define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) ) +#define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2) + +/***************************************************************************/ +/* PATH STRUCTURES AND DEFINES */ +/***************************************************************************/ + + +/* Search_by_key fills up the path from the root to the leaf as it descends the tree looking for the + key. It uses reiserfs_bread to try to find buffers in the cache given their block number. If it + does not find them in the cache it reads them from disk. For each node search_by_key finds using + reiserfs_bread it then uses bin_search to look through that node. bin_search will find the + position of the block_number of the next node if it is looking through an internal node. If it + is looking through a leaf node bin_search will find the position of the item which has key either + equal to given key, or which is the maximal key less than the given key. */ + +struct path_element { + struct buffer_head * pe_buffer; /* Pointer to the buffer at the path in the tree. */ + int pe_position; /* Position in the tree node which is placed in the */ + /* buffer above. */ +}; + +#define MAX_HEIGHT 5 /* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */ +#define EXTENDED_MAX_HEIGHT 7 /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */ +#define FIRST_PATH_ELEMENT_OFFSET 2 /* Must be equal to at least 2. */ + +#define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */ +#define MAX_FEB_SIZE 6 /* this MUST be MAX_HEIGHT + 1. See about FEB below */ + + + +/* We need to keep track of who the ancestors of nodes are. When we + perform a search we record which nodes were visited while + descending the tree looking for the node we searched for. This list + of nodes is called the path. This information is used while + performing balancing. Note that this path information may become + invalid, and this means we must check it when using it to see if it + is still valid. You'll need to read search_by_key and the comments + in it, especially about decrement_counters_in_path(), to understand + this structure. */ +struct path { + int path_length; /* Length of the array above. */ + struct path_element path_elements[EXTENDED_MAX_HEIGHT]; /* Array of the path elements. */ + int pos_in_item; +}; + +#define pos_in_item(path) ((path)->pos_in_item) + +#define INITIALIZE_PATH(var) \ +struct path var = {ILLEGAL_PATH_ELEMENT_OFFSET, } + +/* Get path element by path and path position. */ +#define PATH_OFFSET_PELEMENT(p_s_path,n_offset) ((p_s_path)->path_elements +(n_offset)) + +/* Get buffer header at the path by path and path position. */ +#define PATH_OFFSET_PBUFFER(p_s_path,n_offset) (PATH_OFFSET_PELEMENT(p_s_path,n_offset)->pe_buffer) + +/* Get position in the element at the path by path and path position. */ +#define PATH_OFFSET_POSITION(p_s_path,n_offset) (PATH_OFFSET_PELEMENT(p_s_path,n_offset)->pe_position) + + +#define PATH_PLAST_BUFFER(p_s_path) (PATH_OFFSET_PBUFFER((p_s_path), (p_s_path)->path_length)) +#define PATH_LAST_POSITION(p_s_path) (PATH_OFFSET_POSITION((p_s_path), (p_s_path)->path_length)) + + +#define PATH_PITEM_HEAD(p_s_path) B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_path),PATH_LAST_POSITION(p_s_path)) + +/* in do_balance leaf has h == 0 in contrast with path structure, + where root has level == 0. That is why we need these defines */ +#define PATH_H_PBUFFER(p_s_path, h) PATH_OFFSET_PBUFFER (p_s_path, p_s_path->path_length - (h)) /* tb->S[h] */ +#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1) /* tb->F[h] or tb->S[0]->b_parent */ +#define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h)) +#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) /* tb->S[h]->b_item_order */ + +#define PATH_H_PATH_OFFSET(p_s_path, n_h) ((p_s_path)->path_length - (n_h)) + +#define get_bh(path) PATH_PLAST_BUFFER(path) +#define get_ih(path) PATH_PITEM_HEAD(path) +#define get_item_pos(path) PATH_LAST_POSITION(path) +#define get_item(path) ((void *)B_N_PITEM(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION (path))) +#define item_moved(ih,path) comp_items(ih, path) +#define path_changed(ih,path) comp_items (ih, path) + + +/***************************************************************************/ +/* MISC */ +/***************************************************************************/ + +/* Size of pointer to the unformatted node. */ +#define UNFM_P_SIZE (sizeof(unsigned long)) + +// in in-core inode key is stored on le form +#define INODE_PKEY(inode) ((struct key *)((inode)->u.reiserfs_i.i_key)) +//#define mark_tail_converted(inode) (atomic_set(&((inode)->u.reiserfs_i.i_converted),1)) +//#define unmark_tail_converted(inode) (atomic_set(&((inode)->u.reiserfs_i.i_converted), 0)) +//#define is_tail_converted(inode) (atomic_read(&((inode)->u.reiserfs_i.i_converted))) + + + +#define MAX_UL_INT 0xffffffff +#define MAX_INT 0x7ffffff +#define MAX_US_INT 0xffff + +///#define TOO_LONG_LENGTH (~0ULL) + +// reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset +#define U32_MAX (~(__u32)0) +extern inline loff_t max_reiserfs_offset (struct inode * inode) +{ + if (inode_items_version (inode) == ITEM_VERSION_1) + return (loff_t)U32_MAX; + + return (loff_t)((~(__u64)0) >> 4); +} + + +/*#define MAX_KEY_UNIQUENESS MAX_UL_INT*/ +#define MAX_KEY_OBJECTID MAX_UL_INT + + +#define MAX_B_NUM MAX_UL_INT +#define MAX_FC_NUM MAX_US_INT + + +/* the purpose is to detect overflow of an unsigned short */ +#define REISERFS_LINK_MAX (MAX_US_INT - 1000) + + +/* The following defines are used in reiserfs_insert_item and reiserfs_append_item */ +#define REISERFS_KERNEL_MEM 0 /* reiserfs kernel memory mode */ +#define REISERFS_USER_MEM 1 /* reiserfs user memory mode */ + +#define fs_generation(s) ((s)->u.reiserfs_sb.s_generation_counter) +#define get_generation(s) atomic_read (&fs_generation(s)) +#define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen) +#define fs_changed(gen,s) (gen != get_generation (s)) + + +/***************************************************************************/ +/* FIXATE NODES */ +/***************************************************************************/ + +//#define VI_TYPE_STAT_DATA 1 +//#define VI_TYPE_DIRECT 2 +//#define VI_TYPE_INDIRECT 4 +//#define VI_TYPE_DIRECTORY 8 +//#define VI_TYPE_FIRST_DIRECTORY_ITEM 16 +//#define VI_TYPE_INSERTED_DIRECTORY_ITEM 32 + +#define VI_TYPE_LEFT_MERGEABLE 1 +#define VI_TYPE_RIGHT_MERGEABLE 2 + +/* To make any changes in the tree we always first find node, that + contains item to be changed/deleted or place to insert a new + item. We call this node S. To do balancing we need to decide what + we will shift to left/right neighbor, or to a new node, where new + item will be etc. To make this analysis simpler we build virtual + node. Virtual node is an array of items, that will replace items of + node S. (For instance if we are going to delete an item, virtual + node does not contain it). Virtual node keeps information about + item sizes and types, mergeability of first and last items, sizes + of all entries in directory item. We use this array of items when + calculating what we can shift to neighbors and how many nodes we + have to have if we do not any shiftings, if we shift to left/right + neighbor or to both. */ +struct virtual_item +{ + int vi_index; // index in the array of item operations + unsigned short vi_type; // left/right mergeability + unsigned short vi_item_len; /* length of item that it will have after balancing */ + struct item_head * vi_ih; + const char * vi_item; // body of item (old or new) + const void * vi_new_data; // 0 always but paste mode + void * vi_uarea; // item specific area +}; + + +struct virtual_node +{ + char * vn_free_ptr; /* this is a pointer to the free space in the buffer */ + unsigned short vn_nr_item; /* number of items in virtual node */ + short vn_size; /* size of node , that node would have if it has unlimited size and no balancing is performed */ + short vn_mode; /* mode of balancing (paste, insert, delete, cut) */ + short vn_affected_item_num; + short vn_pos_in_item; + struct item_head * vn_ins_ih; /* item header of inserted item, 0 for other modes */ + const void * vn_data; + struct virtual_item * vn_vi; /* array of items (including a new one, excluding item to be deleted) */ +}; + + +/***************************************************************************/ +/* TREE BALANCE */ +/***************************************************************************/ + +/* This temporary structure is used in tree balance algorithms, and + constructed as we go to the extent that its various parts are + needed. It contains arrays of nodes that can potentially be + involved in the balancing of node S, and parameters that define how + each of the nodes must be balanced. Note that in these algorithms + for balancing the worst case is to need to balance the current node + S and the left and right neighbors and all of their parents plus + create a new node. We implement S1 balancing for the leaf nodes + and S0 balancing for the internal nodes (S1 and S0 are defined in + our papers.)*/ + +#define MAX_FREE_BLOCK 7 /* size of the array of buffers to free at end of do_balance */ + +/* maximum number of FEB blocknrs on a single level */ +#define MAX_AMOUNT_NEEDED 2 + +/* someday somebody will prefix every field in this struct with tb_ */ +struct tree_balance +{ + int tb_mode; + int need_balance_dirty; + struct super_block * tb_sb; + struct reiserfs_transaction_handle *transaction_handle ; + struct path * tb_path; + struct buffer_head * L[MAX_HEIGHT]; /* array of left neighbors of nodes in the path */ + struct buffer_head * R[MAX_HEIGHT]; /* array of right neighbors of nodes in the path*/ + struct buffer_head * FL[MAX_HEIGHT]; /* array of fathers of the left neighbors */ + struct buffer_head * FR[MAX_HEIGHT]; /* array of fathers of the right neighbors */ + struct buffer_head * CFL[MAX_HEIGHT]; /* array of common parents of center node and its left neighbor */ + struct buffer_head * CFR[MAX_HEIGHT]; /* array of common parents of center node and its right neighbor */ + + struct buffer_head * FEB[MAX_FEB_SIZE]; /* array of empty buffers. Number of buffers in array equals + cur_blknum. */ + struct buffer_head * used[MAX_FEB_SIZE]; + struct buffer_head * thrown[MAX_FEB_SIZE]; + int lnum[MAX_HEIGHT]; /* array of number of items which must be + shifted to the left in order to balance the + current node; for leaves includes item that + will be partially shifted; for internal + nodes, it is the number of child pointers + rather than items. It includes the new item + being created. The code sometimes subtracts + one to get the number of wholly shifted + items for other purposes. */ + int rnum[MAX_HEIGHT]; /* substitute right for left in comment above */ + int lkey[MAX_HEIGHT]; /* array indexed by height h mapping the key delimiting L[h] and + S[h] to its item number within the node CFL[h] */ + int rkey[MAX_HEIGHT]; /* substitute r for l in comment above */ + int insert_size[MAX_HEIGHT]; /* the number of bytes by we are trying to add or remove from + S[h]. A negative value means removing. */ + int blknum[MAX_HEIGHT]; /* number of nodes that will replace node S[h] after + balancing on the level h of the tree. If 0 then S is + being deleted, if 1 then S is remaining and no new nodes + are being created, if 2 or 3 then 1 or 2 new nodes is + being created */ + + /* fields that are used only for balancing leaves of the tree */ + int cur_blknum; /* number of empty blocks having been already allocated */ + int s0num; /* number of items that fall into left most node when S[0] splits */ + int s1num; /* number of items that fall into first new node when S[0] splits */ + int s2num; /* number of items that fall into second new node when S[0] splits */ + int lbytes; /* number of bytes which can flow to the left neighbor from the left */ + /* most liquid item that cannot be shifted from S[0] entirely */ + /* if -1 then nothing will be partially shifted */ + int rbytes; /* number of bytes which will flow to the right neighbor from the right */ + /* most liquid item that cannot be shifted from S[0] entirely */ + /* if -1 then nothing will be partially shifted */ + int s1bytes; /* number of bytes which flow to the first new node when S[0] splits */ + /* note: if S[0] splits into 3 nodes, then items do not need to be cut */ + int s2bytes; + struct buffer_head * buf_to_free[MAX_FREE_BLOCK]; /* buffers which are to be freed after do_balance finishes by unfix_nodes */ + char * vn_buf; /* kmalloced memory. Used to create + virtual node and keep map of + dirtied bitmap blocks */ + int vn_buf_size; /* size of the vn_buf */ + struct virtual_node * tb_vn; /* VN starts after bitmap of bitmap blocks */ + + int fs_gen; /* saved value of `reiserfs_generation' counter + see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */ +} ; + + +#if 0 + /* when balancing we potentially affect a 3 node wide column of nodes + in the tree (the top of the column may be tapered). C is the nodes + at the center of this column, and L and R are the nodes to the + left and right. */ + struct seal * L_path_seals[MAX_HEIGHT]; + struct seal * C_path_seals[MAX_HEIGHT]; + struct seal * R_path_seals[MAX_HEIGHT]; + char L_path_lock_types[MAX_HEIGHT]; /* 'r', 'w', or 'n' for read, write, or none */ + char C_path_lock_types[MAX_HEIGHT]; + char R_path_lock_types[MAX_HEIGHT]; + + + struct seal_list_elem * C_seal[MAX_HEIGHT]; /* array of seals on nodes in the path */ + struct seal_list_elem * L_seal[MAX_HEIGHT]; /* array of seals on left neighbors of nodes in the path */ + struct seal_list_elem * R_seal[MAX_HEIGHT]; /* array of seals on right neighbors of nodes in the path*/ + struct seal_list_elem * FL_seal[MAX_HEIGHT]; /* array of seals on fathers of the left neighbors */ + struct seal_list_elem * FR_seal[MAX_HEIGHT]; /* array of seals on fathers of the right neighbors */ + struct seal_list_elem * CFL_seal[MAX_HEIGHT]; /* array of seals on common parents of center node and its left neighbor */ + struct seal_list_elem * CFR_seal[MAX_HEIGHT]; /* array of seals on common parents of center node and its right neighbor */ + + struct char C_desired_lock_type[MAX_HEIGHT]; /* 'r', 'w', or 'n' for read, write, or none */ + struct char L_desired_lock_type[MAX_HEIGHT]; + struct char R_desired_lock_type[MAX_HEIGHT]; + struct char FL_desired_lock_type[MAX_HEIGHT]; + struct char FR_desired_lock_type[MAX_HEIGHT]; + struct char CFL_desired_lock_type[MAX_HEIGHT]; + struct char CFR_desired_lock_type[MAX_HEIGHT]; +#endif + + + + + +/* These are modes of balancing */ + +/* When inserting an item. */ +#define M_INSERT 'i' +/* When inserting into (directories only) or appending onto an already + existant item. */ +#define M_PASTE 'p' +/* When deleting an item. */ +#define M_DELETE 'd' +/* When truncating an item or removing an entry from a (directory) item. */ +#define M_CUT 'c' + +/* used when balancing on leaf level skipped (in reiserfsck) */ +#define M_INTERNAL 'n' + +/* When further balancing is not needed, then do_balance does not need + to be called. */ +#define M_SKIP_BALANCING 's' +#define M_CONVERT 'v' + +/* modes of leaf_move_items */ +#define LEAF_FROM_S_TO_L 0 +#define LEAF_FROM_S_TO_R 1 +#define LEAF_FROM_R_TO_L 2 +#define LEAF_FROM_L_TO_R 3 +#define LEAF_FROM_S_TO_SNEW 4 + +#define FIRST_TO_LAST 0 +#define LAST_TO_FIRST 1 + +/* used in do_balance for passing parent of node information that has + been gotten from tb struct */ +struct buffer_info { + struct tree_balance * tb; + struct buffer_head * bi_bh; + struct buffer_head * bi_parent; + int bi_position; +}; + + +/* there are 4 types of items: stat data, directory item, indirect, direct. ++-------------------+------------+--------------+------------+ +| | k_offset | k_uniqueness | mergeable? | ++-------------------+------------+--------------+------------+ +| stat data | 0 | 0 | no | ++-------------------+------------+--------------+------------+ +| 1st directory item| DOT_OFFSET |DIRENTRY_UNIQUENESS| no | +| non 1st directory | hash value | | yes | +| item | | | | ++-------------------+------------+--------------+------------+ +| indirect item | offset + 1 |TYPE_INDIRECT | if this is not the first indirect item of the object ++-------------------+------------+--------------+------------+ +| direct item | offset + 1 |TYPE_DIRECT | if not this is not the first direct item of the object ++-------------------+------------+--------------+------------+ +*/ + +struct item_operations { + int (*bytes_number) (struct item_head * ih, int block_size); + void (*decrement_key) (struct cpu_key *); + int (*is_left_mergeable) (struct key * ih, unsigned long bsize); + void (*print_item) (struct item_head *, char * item); + void (*check_item) (struct item_head *, char * item); + + int (*create_vi) (struct virtual_node * vn, struct virtual_item * vi, + int is_affected, int insert_size); + int (*check_left) (struct virtual_item * vi, int free, + int start_skip, int end_skip); + int (*check_right) (struct virtual_item * vi, int free); + int (*part_size) (struct virtual_item * vi, int from, int to); + int (*unit_num) (struct virtual_item * vi); + void (*print_vi) (struct virtual_item * vi); +}; + + +extern struct item_operations stat_data_ops, indirect_ops, direct_ops, + direntry_ops; +extern struct item_operations * item_ops [4]; + +#define op_bytes_number(ih,bsize) item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize) +#define op_is_left_mergeable(key,bsize) item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize) +#define op_print_item(ih,item) item_ops[le_ih_k_type (ih)]->print_item (ih, item) +#define op_check_item(ih,item) item_ops[le_ih_k_type (ih)]->check_item (ih, item) +#define op_create_vi(vn,vi,is_affected,insert_size) item_ops[le_ih_k_type ((vi)->vi_ih)]->create_vi (vn,vi,is_affected,insert_size) +#define op_check_left(vi,free,start_skip,end_skip) item_ops[(vi)->vi_index]->check_left (vi, free, start_skip, end_skip) +#define op_check_right(vi,free) item_ops[(vi)->vi_index]->check_right (vi, free) +#define op_part_size(vi,from,to) item_ops[(vi)->vi_index]->part_size (vi, from, to) +#define op_unit_num(vi) item_ops[(vi)->vi_index]->unit_num (vi) +#define op_print_vi(vi) item_ops[(vi)->vi_index]->print_vi (vi) + + + + + +#define COMP_KEYS comp_keys +#define COMP_SHORT_KEYS comp_short_keys +#define keys_of_same_object comp_short_keys + +/*#define COMP_KEYS(p_s_key1, p_s_key2) comp_keys((unsigned long *)(p_s_key1), (unsigned long *)(p_s_key2)) +#define COMP_SHORT_KEYS(p_s_key1, p_s_key2) comp_short_keys((unsigned long *)(p_s_key1), (unsigned long *)(p_s_key2))*/ + + +/* number of blocks pointed to by the indirect item */ +#define I_UNFM_NUM(p_s_ih) ( (p_s_ih)->ih_item_len / UNFM_P_SIZE ) + +/* the used space within the unformatted node corresponding to pos within the item pointed to by ih */ +#define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - (ih)->u.ih_free_space : (size)) + +/* number of bytes contained by the direct item or the unformatted nodes the indirect item points to */ + + +/* get the item header */ +#define B_N_PITEM_HEAD(bh,item_num) ( (struct item_head * )((bh)->b_data + BLKH_SIZE) + (item_num) ) + +/* get key */ +#define B_N_PDELIM_KEY(bh,item_num) ( (struct key * )((bh)->b_data + BLKH_SIZE) + (item_num) ) + +/* get the key */ +#define B_N_PKEY(bh,item_num) ( &(B_N_PITEM_HEAD(bh,item_num)->ih_key) ) + +/* get item body */ +#define B_N_PITEM(bh,item_num) ( (bh)->b_data + B_N_PITEM_HEAD((bh),(item_num))->ih_item_location) + +/* get the stat data by the buffer header and the item order */ +#define B_N_STAT_DATA(bh,nr) \ +( (struct stat_data *)((bh)->b_data+B_N_PITEM_HEAD((bh),(nr))->ih_item_location ) ) + + /* following defines use reiserfs buffer header and item header */ + +/* get stat-data */ +#define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + (ih)->ih_item_location) ) + +// this is 3976 for size==4096 +#define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE) + +/* indirect items consist of entries which contain blocknrs, pos + indicates which entry, and B_I_POS_UNFM_POINTER resolves to the + blocknr contained by the entry pos points to */ +#define B_I_POS_UNFM_POINTER(bh,ih,pos) (*(((unsigned long *)B_I_PITEM(bh,ih)) + (pos))) + +/* Reiserfs buffer cache statistics. */ +#ifdef REISERFS_CACHE_STAT + struct reiserfs_cache_stat + { + int nr_reiserfs_ll_r_block; /* Number of block reads. */ + int nr_reiserfs_ll_w_block; /* Number of block writes. */ + int nr_reiserfs_schedule; /* Number of locked buffers waits. */ + unsigned long nr_reiserfs_bread; /* Number of calls to reiserfs_bread function */ + unsigned long nr_returns; /* Number of breads of buffers that were hoped to contain a key but did not after bread completed + (usually due to object shifting while bread was executing.) + In the code this manifests as the number + of times that the repeat variable is nonzero in search_by_key.*/ + unsigned long nr_fixed; /* number of calls of fix_nodes function */ + unsigned long nr_failed; /* number of calls of fix_nodes in which schedule occurred while the function worked */ + unsigned long nr_find1; /* How many times we access a child buffer using its direct pointer from an internal node.*/ + unsigned long nr_find2; /* Number of times there is neither a direct pointer to + nor any entry in the child list pointing to the buffer. */ + unsigned long nr_find3; /* When parent is locked (meaning that there are no direct pointers) + or parent is leaf and buffer to be found is an unformatted node. */ + } cache_stat; +#endif + +struct reiserfs_iget4_args { + __u32 objectid ; +} ; + +/***************************************************************************/ +/* FUNCTION DECLARATIONS */ +/***************************************************************************/ + +/*#ifdef __KERNEL__*/ + +/* journal.c see journal.c for all the comments here */ + +#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit structs at 4k */ + + +/* first block written in a commit. */ +struct reiserfs_journal_desc { + __u32 j_trans_id ; /* id of commit */ + __u32 j_len ; /* length of commit. len +1 is the commit block */ + __u32 j_mount_id ; /* mount id of this trans*/ + __u32 j_realblock[JOURNAL_TRANS_HALF] ; /* real locations for each block */ + char j_magic[12] ; +} ; + +/* last block written in a commit */ +struct reiserfs_journal_commit { + __u32 j_trans_id ; /* must match j_trans_id from the desc block */ + __u32 j_len ; /* ditto */ + __u32 j_realblock[JOURNAL_TRANS_HALF] ; /* real locations for each block */ + char j_digest[16] ; /* md5 sum of all the blocks involved, including desc and commit. not used, kill it */ +} ; + +/* this header block gets written whenever a transaction is considered fully flushed, and is more recent than the +** last fully flushed transaction. fully flushed means all the log blocks and all the real blocks are on disk, +** and this transaction does not need to be replayed. +*/ +struct reiserfs_journal_header { + __u32 j_last_flush_trans_id ; /* id of last fully flushed transaction */ + __u32 j_first_unflushed_offset ; /* offset in the log of where to start replay after a crash */ + __u32 long j_mount_id ; +} ; + +/* these are used to keep flush pages that contain converted direct items. +** if the page is not flushed before the transaction that converted it +** is committed, we risk losing data +** +** note, while a page is in this list, its counter is incremented. +*/ +struct reiserfs_page_list { + struct reiserfs_page_list *next ; + struct reiserfs_page_list *prev ; + struct page *page ; + unsigned long blocknr ; /* block number holding converted data */ + + /* if a transaction writer has the page locked the flush_page_list + ** function doesn't need to (and can't) get the lock while flushing + ** the page. do_not_lock needs to be set by anyone who calls journal_end + ** with a page lock held. They have to look in the inode and see + ** if the inode has the page they have locked in the flush list. + ** + ** this sucks. + */ + int do_not_lock ; +} ; + +extern task_queue reiserfs_commit_thread_tq ; +extern task_queue reiserfs_end_io_tq ; +extern wait_queue_head_t reiserfs_commit_thread_wait ; + +/* biggest tunable defines are right here */ +#define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */ +#define JOURNAL_MAX_BATCH 900 /* max blocks to batch into one transaction, don't make this any bigger than 900 */ +#define JOURNAL_MAX_COMMIT_AGE 30 +#define JOURNAL_MAX_TRANS_AGE 30 +#define JOURNAL_PER_BALANCE_CNT 12 /* must be >= (5 + 2 * (MAX_HEIGHT-2) + 1) */ + +/* both of these can be as low as 1, or as high as you want. The min is the +** number of 4k bitmap nodes preallocated on mount. New nodes are allocated +** as needed, and released when transactions are committed. On release, if +** the current number of nodes is > max, the node is freed, otherwise, +** it is put on a free list for faster use later. +*/ +#define REISERFS_MIN_BITMAP_NODES 10 +#define REISERFS_MAX_BITMAP_NODES 100 + +#define JBH_HASH_SHIFT 13 /* these are based on journal hash size of 8192 */ +#define JBH_HASH_MASK 8191 + +/* After several hours of tedious analysis, the following hash + * function won. Do not mess with it... -DaveM + */ +#define _jhashfn(dev,block) \ + ((((dev)<<(JBH_HASH_SHIFT - 6)) ^ ((dev)<<(JBH_HASH_SHIFT - 9))) ^ \ + (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12)))) +#define journal_hash(t,dev,block) ((t)[_jhashfn((dev),(block)) & JBH_HASH_MASK]) + +/* finds n'th buffer with 0 being the start of this commit. Needs to go away, j_ap_blocks has changed +** since I created this. One chunk of code in journal.c needs changing before deleting it +*/ +#define JOURNAL_BUFFER(j,n) ((j)->j_ap_blocks[((j)->j_start + (n)) % JOURNAL_BLOCK_COUNT]) + +void reiserfs_check_lock_depth(char *caller) ; +void reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh, int wait) ; +void reiserfs_restore_prepared_buffer(struct super_block *, struct buffer_head *bh) ; +int journal_init(struct super_block *) ; +int journal_release(struct reiserfs_transaction_handle*, struct super_block *) ; +int journal_release_error(struct reiserfs_transaction_handle*, struct super_block *) ; +int journal_end(struct reiserfs_transaction_handle *, struct super_block *, unsigned long) ; +int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *, unsigned long) ; +int journal_mark_dirty_nolog(struct reiserfs_transaction_handle *, struct super_block *, struct buffer_head *bh) ; +int journal_mark_freed(struct reiserfs_transaction_handle *, struct super_block *, unsigned long blocknr) ; +int push_journal_writer(char *w) ; +int pop_journal_writer(int windex) ; +int journal_lock_dobalance(struct super_block *p_s_sb) ; +int journal_unlock_dobalance(struct super_block *p_s_sb) ; +int journal_transaction_should_end(struct reiserfs_transaction_handle *, int) ; +int reiserfs_in_journal(struct super_block *p_s_sb, kdev_t dev, unsigned long bl, int size, int searchall, unsigned long *next) ; +int journal_begin(struct reiserfs_transaction_handle *, struct super_block *p_s_sb, unsigned long) ; +int journal_join(struct reiserfs_transaction_handle *, struct super_block *p_s_sb, unsigned long) ; +struct super_block *reiserfs_get_super(kdev_t dev) ; +void flush_async_commits(struct super_block *p_s_sb) ; + +int remove_from_transaction(struct super_block *p_s_sb, unsigned long blocknr, int already_cleaned) ; +int remove_from_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, struct buffer_head *bh, int remove_freed) ; + +int buffer_journaled(struct buffer_head *bh) ; +int mark_buffer_journal_new(struct buffer_head *bh) ; +int reiserfs_sync_all_buffers(kdev_t dev, int wait) ; +int reiserfs_sync_buffers(kdev_t dev, int wait) ; +int reiserfs_add_page_to_flush_list(struct reiserfs_transaction_handle *, + struct inode *, struct buffer_head *) ; +int reiserfs_remove_page_from_flush_list(struct reiserfs_transaction_handle *, + struct inode *) ; + +int reiserfs_allocate_list_bitmaps(struct super_block *s, struct reiserfs_list_bitmap *, int) ; + +static inline int reiserfs_buffer_prepared(struct buffer_head *bh) { + if (bh && test_bit(BH_JPrepared, &bh->b_state)) + return 1 ; + else + return 0 ; +} + +/* buffer was journaled, waiting to get to disk */ +static inline int buffer_journal_dirty(struct buffer_head *bh) { + if (bh) + return test_bit(BH_JDirty_wait, &bh->b_state) ; + else + return 0 ; +} +static inline int mark_buffer_notjournal_dirty(struct buffer_head *bh) { + if (bh) + clear_bit(BH_JDirty_wait, &bh->b_state) ; + return 0 ; +} +static inline int mark_buffer_notjournal_new(struct buffer_head *bh) { + if (bh) { + clear_bit(BH_JNew, &bh->b_state) ; + } + return 0 ; +} + +/* objectid.c */ +__u32 reiserfs_get_unused_objectid (struct reiserfs_transaction_handle *th); +void reiserfs_release_objectid (struct reiserfs_transaction_handle *th, __u32 objectid_to_release); +int reiserfs_convert_objectid_map_v1(struct super_block *) ; + +/* stree.c */ +int B_IS_IN_TREE(struct buffer_head *); +extern inline void copy_key (void * to, void * from); +extern inline void copy_short_key (void * to, void * from); +extern inline void copy_item_head(void * p_v_to, void * p_v_from); + +// first key is in cpu form, second - le +extern inline int comp_keys (struct key * le_key, struct cpu_key * cpu_key); +extern inline int comp_short_keys (struct key * le_key, struct cpu_key * cpu_key); +extern inline void le_key2cpu_key (struct cpu_key * to, struct key * from); + +// both are cpu keys +extern inline int comp_cpu_keys (struct cpu_key *, struct cpu_key *); +extern inline int comp_short_cpu_keys (struct cpu_key *, struct cpu_key *); +extern inline void cpu_key2cpu_key (struct cpu_key *, struct cpu_key *); + +// both are in le form +extern inline int comp_le_keys (struct key *, struct key *); +extern inline int comp_short_le_keys (struct key *, struct key *); + +// +// get key version from on disk key - kludge +// +extern inline int le_key_version (struct key * key) +{ + int type; + + type = le16_to_cpu (key->u.k_offset_v2.k_type); + if (type != TYPE_DIRECT && type != TYPE_INDIRECT && type != TYPE_DIRENTRY) + return ITEM_VERSION_1; + + return ITEM_VERSION_2; + +} + + +extern inline void copy_key (void * to, void * from) +{ + memcpy (to, from, KEY_SIZE); +} + + +int comp_items (struct item_head * p_s_ih, struct path * p_s_path); +struct key * get_rkey (struct path * p_s_chk_path, struct super_block * p_s_sb); +inline int bin_search (void * p_v_key, void * p_v_base, int p_n_num, int p_n_width, int * p_n_pos); +int search_by_key (struct super_block *, struct cpu_key *, struct path *, int); +#define search_item(s,key,path) search_by_key (s, key, path, DISK_LEAF_NODE_LEVEL) +int search_for_position_by_key (struct super_block * p_s_sb, struct cpu_key * p_s_cpu_key, struct path * p_s_search_path); +extern inline void decrement_bcount (struct buffer_head * p_s_bh); +void decrement_counters_in_path (struct path * p_s_search_path); +void pathrelse (struct path * p_s_search_path); +int reiserfs_check_path(struct path *p) ; +void pathrelse_and_restore (struct super_block *s, struct path * p_s_search_path); + +int reiserfs_insert_item (struct reiserfs_transaction_handle *th, + struct path * path, + struct cpu_key * key, + struct item_head * ih, const char * body); + +int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th, + struct path * path, + struct cpu_key * key, + const char * body, int paste_size); + +int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th, + struct path * path, + struct cpu_key * key, + struct inode * inode, + struct page *page, + loff_t new_file_size); + +int reiserfs_delete_item (struct reiserfs_transaction_handle *th, + struct path * path, + struct cpu_key * key, + struct inode * inode, + struct buffer_head * p_s_un_bh); + + +void reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode * p_s_inode); +void reiserfs_do_truncate (struct reiserfs_transaction_handle *th, + struct inode * p_s_inode, struct page *, + int update_timestamps); +// +void reiserfs_vfs_truncate_file (struct inode * p_s_inode); +//void lock_inode_to_convert (struct inode * p_s_inode); +//void unlock_inode_after_convert (struct inode * p_s_inode); +//void increment_i_read_sync_counter (struct inode * p_s_inode); +//void decrement_i_read_sync_counter (struct inode * p_s_inode); + + +#define block_size(inode) ((inode)->i_sb->s_blocksize) +#define file_size(inode) ((inode)->i_size) +#define tail_size(inode) (file_size (inode) & (block_size (inode) - 1)) + +#define tail_has_to_be_packed(inode) (!dont_have_tails ((inode)->i_sb) &&\ +!STORE_TAIL_IN_UNFM(file_size (inode), tail_size(inode), block_size (inode))) + +/* +int get_buffer_by_range (struct super_block * p_s_sb, struct key * p_s_range_begin, struct key * p_s_range_end, + struct buffer_head ** pp_s_buf, unsigned long * p_n_objectid); +int get_buffers_from_range (struct super_block * p_s_sb, struct key * p_s_range_start, struct key * p_s_range_end, + struct buffer_head ** p_s_range_buffers, + int n_max_nr_buffers_to_return); +*/ + +#ifndef REISERFS_FSCK + +//inline int is_left_mergeable (struct item_head * ih, unsigned long bsize); + +#else + +int is_left_mergeable (struct super_block * s, struct path * path); +int is_right_mergeable (struct super_block * s, struct path * path); +int are_items_mergeable (struct item_head * left, struct item_head * right, int bsize); + +#endif +void padd_item (char * item, int total_length, int length); + + +/* inode.c */ + +void reiserfs_truncate_file(struct inode *) ; +void make_cpu_key (struct cpu_key * cpu_key, const struct inode * inode, loff_t offset, + int type, int key_length); +void make_le_item_head (struct item_head * ih, struct cpu_key * key, int version, + loff_t offset, int type, int length, int entry_count); +/*void store_key (struct key * key); +void forget_key (struct key * key);*/ +int reiserfs_get_block (struct inode * inode, long block, + struct buffer_head * bh_result, int create); +struct inode * reiserfs_iget (struct super_block * s, struct cpu_key * key); +void reiserfs_read_inode (struct inode * inode) ; +void reiserfs_read_inode2(struct inode * inode, void *p) ; +void reiserfs_delete_inode (struct inode * inode); +extern int reiserfs_notify_change(struct dentry * dentry, struct iattr * attr); +void reiserfs_write_inode (struct inode * inode, int) ; + +/* we don't mark inodes dirty, we just log them */ +static inline void reiserfs_dirty_inode (struct inode * inode) { + reiserfs_write_inode(inode, 0) ; +} + +struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th, const struct inode * dir, int mode, + const char * symname, int item_len, + struct dentry *dentry, struct inode *inode, int * err); +int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode); +void reiserfs_update_sd (struct reiserfs_transaction_handle *th, struct inode * inode); +int reiserfs_inode_setattr(struct dentry *, struct iattr * attr); + +/* namei.c */ +inline void set_de_name_and_namelen (struct reiserfs_dir_entry * de); +int search_by_entry_key (struct super_block * sb, struct cpu_key * key, struct path * path, + struct reiserfs_dir_entry * de); +struct dentry * reiserfs_lookup (struct inode * dir, struct dentry *dentry); +int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode); +int reiserfs_mknod (struct inode * dir_inode, struct dentry *dentry, int mode, int rdev); +int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode); +int reiserfs_rmdir (struct inode * dir, struct dentry *dentry); +int reiserfs_unlink (struct inode * dir, struct dentry *dentry); +int reiserfs_symlink (struct inode * dir, struct dentry *dentry, const char * symname); +int reiserfs_link (struct dentry * old_dentry, struct inode * dir, struct dentry *dentry); +int reiserfs_rename (struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir, struct dentry *new_dentry); + +/* super.c */ +inline void reiserfs_mark_buffer_dirty (struct buffer_head * bh, int flag); +inline void reiserfs_mark_buffer_clean (struct buffer_head * bh); +void reiserfs_panic (struct super_block * s, const char * fmt, ...); +void reiserfs_write_super (struct super_block * s); +void reiserfs_put_super (struct super_block * s); +int reiserfs_remount (struct super_block * s, int * flags, char * data); +/*int read_super_block (struct super_block * s, int size); +int read_bitmaps (struct super_block * s); +int read_old_bitmaps (struct super_block * s); +int read_old_super_block (struct super_block * s, int size);*/ +struct super_block * reiserfs_read_super (struct super_block * s, void * data, int silent); +int reiserfs_statfs (struct super_block * s, struct statfs * buf); + +/* dir.c */ +extern struct inode_operations reiserfs_dir_inode_operations; +extern struct file_operations reiserfs_dir_operations; + +/* tail_conversion.c */ +int direct2indirect (struct reiserfs_transaction_handle *, struct inode *, struct path *, struct buffer_head *, loff_t); +int indirect2direct (struct reiserfs_transaction_handle *, struct inode *, struct page *, struct path *, struct cpu_key *, loff_t, char *); +void reiserfs_unmap_buffer(struct buffer_head *) ; + + +/* file.c */ +extern struct inode_operations reiserfs_file_inode_operations; +extern struct file_operations reiserfs_file_operations; +extern struct address_space_operations reiserfs_address_space_operations ; +int get_new_buffer (struct reiserfs_transaction_handle *th, struct buffer_head *, + struct buffer_head **, struct path *); + + +/* buffer2.c */ +struct buffer_head * reiserfs_getblk (kdev_t n_dev, int n_block, int n_size); +void wait_buffer_until_released (struct buffer_head * bh); +struct buffer_head * reiserfs_bread (kdev_t n_dev, int n_block, int n_size); + + +/* fix_nodes.c */ +void * reiserfs_kmalloc (size_t size, int flags, struct super_block * s); +void reiserfs_kfree (const void * vp, size_t size, struct super_block * s); +int fix_nodes (int n_op_mode, struct tree_balance * p_s_tb, struct item_head * p_s_ins_ih, const void *); +void unfix_nodes (struct tree_balance *); +void free_buffers_in_tb (struct tree_balance * p_s_tb); + + +/* prints.c */ +void reiserfs_panic (struct super_block * s, const char * fmt, ...); +void reiserfs_warning (const char * fmt, ...); +void reiserfs_debug (struct super_block *s, int level, const char * fmt, ...); +void print_virtual_node (struct virtual_node * vn); +void print_indirect_item (struct buffer_head * bh, int item_num); +void store_print_tb (struct tree_balance * tb); +void print_cur_tb (char * mes); +void print_de (struct reiserfs_dir_entry * de); +void print_bi (struct buffer_info * bi, char * mes); +#define PRINT_LEAF_ITEMS 1 /* print all items */ +#define PRINT_DIRECTORY_ITEMS 2 /* print directory items */ +#define PRINT_DIRECT_ITEMS 4 /* print contents of direct items */ +void print_block (struct buffer_head * bh, ...); +void print_path (struct tree_balance * tb, struct path * path); +void print_bmap (struct super_block * s, int silent); +void print_bmap_block (int i, char * data, int size, int silent); +/*void print_super_block (struct super_block * s, char * mes);*/ +void print_objectid_map (struct super_block * s); +void print_block_head (struct buffer_head * bh, char * mes); +void check_leaf (struct buffer_head * bh); +void check_internal (struct buffer_head * bh); +void print_statistics (struct super_block * s); + +/* lbalance.c */ +int leaf_move_items (int shift_mode, struct tree_balance * tb, int mov_num, int mov_bytes, struct buffer_head * Snew); +int leaf_shift_left (struct tree_balance * tb, int shift_num, int shift_bytes); +int leaf_shift_right (struct tree_balance * tb, int shift_num, int shift_bytes); +void leaf_delete_items (struct buffer_info * cur_bi, int last_first, int first, int del_num, int del_bytes); +void leaf_insert_into_buf (struct buffer_info * bi, int before, + struct item_head * inserted_item_ih, const char * inserted_item_body, int zeros_number); +void leaf_paste_in_buffer (struct buffer_info * bi, int pasted_item_num, + int pos_in_item, int paste_size, const char * body, int zeros_number); +void leaf_cut_from_buffer (struct buffer_info * bi, int cut_item_num, int pos_in_item, + int cut_size); +void leaf_paste_entries (struct buffer_head * bh, int item_num, int before, + int new_entry_count, struct reiserfs_de_head * new_dehs, const char * records, int paste_size); +/* ibalance.c */ +int balance_internal (struct tree_balance * , int, int, struct item_head * , + struct buffer_head **); + +/* do_balance.c */ +inline void do_balance_mark_leaf_dirty (struct tree_balance * tb, + struct buffer_head * bh, int flag); +#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty +#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty + +void do_balance (struct tree_balance * tb, struct item_head * ih, + const char * body, int flag); +void reiserfs_invalidate_buffer (struct tree_balance * tb, struct buffer_head * bh); + +int get_left_neighbor_position (struct tree_balance * tb, int h); +int get_right_neighbor_position (struct tree_balance * tb, int h); +void replace_key (struct tree_balance * tb, struct buffer_head *, int, struct buffer_head *, int); +void replace_lkey (struct tree_balance *, int, struct item_head *); +void replace_rkey (struct tree_balance *, int, struct item_head *); +void make_empty_node (struct buffer_info *); +struct buffer_head * get_FEB (struct tree_balance *); + +/* bitmap.c */ +int is_reusable (struct super_block * s, unsigned long block, int bit_value); +void reiserfs_free_block (struct reiserfs_transaction_handle *th, unsigned long); +int reiserfs_new_blocknrs (struct reiserfs_transaction_handle *th, + unsigned long * pblocknrs, unsigned long start_from, int amount_needed); +int reiserfs_new_unf_blocknrs (struct reiserfs_transaction_handle *th, + unsigned long * pblocknr, unsigned long start_from); +#ifdef REISERFS_PREALLOCATE +int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th, + struct inode * inode, + unsigned long * pblocknr, + unsigned long start_from); + +void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th, + struct inode * inode); +#endif + +/* hashes.c */ +__u32 keyed_hash (const char *msg, int len); +__u32 yura_hash (const char *msg, int len); +__u32 r5_hash (const char *msg, int len); + +/* version.c */ +char *reiserfs_get_version_string(void) ; + +/* the ext2 bit routines handle do little endian bit operations when used +** on big endian machines. These must be used when changing on disk +** bitmaps. +*/ +#define reiserfs_test_and_set_le_bit ext2_set_bit +#define reiserfs_test_and_clear_le_bit ext2_clear_bit +#define reiserfs_test_le_bit ext2_test_bit +#define reiserfs_find_next_zero_le_bit ext2_find_next_zero_bit + + +// +// this was totally copied from from linux's +// find_first_zero_bit and changed a bit +// + +#ifdef __i386__ + +extern __inline__ int +find_first_nonzero_bit(void * addr, unsigned size) { + int res; + int __d0; + void *__d1; + + + if (!size) { + return (0); + } + __asm__ __volatile__ ( + "cld\n\t" + "xorl %%eax,%%eax\n\t" + "repe; scasl\n\t" + "je 1f\n\t" + "movl -4(%%edi),%%eax\n\t" + "subl $4, %%edi\n\t" + "bsfl %%eax,%%eax\n\t" + "1:\tsubl %%edx,%%edi\n\t" + "shll $3,%%edi\n\t" + "addl %%edi,%%eax" + :"=a" (res), + "=c"(__d0), "=D"(__d1) + :"1" ((size + 31) >> 5), "d" (addr), "2" (addr)); + return (res); +} + +#else /* __i386__ */ + +extern __inline__ int find_next_nonzero_bit(void * addr, unsigned size, unsigned offset) +{ + unsigned int * p = ((unsigned int *) addr) + (offset >> 5); + unsigned int result = offset & ~31UL; + unsigned int tmp; + + if (offset >= size) + return size; + size -= result; + offset &= 31UL; + if (offset) { + tmp = *p++; + /* set to zero first offset bits */ + tmp &= ~(~0UL >> (32-offset)); + if (size < 32) + goto found_first; + if (tmp != 0U) + goto found_middle; + size -= 32; + result += 32; + } + while (size >= 32) { + if ((tmp = *p++) != 0U) + goto found_middle; + result += 32; + size -= 32; + } + if (!size) + return result; + tmp = *p; +found_first: +found_middle: + return result + ffs(tmp); +} + +#define find_first_nonzero_bit(addr,size) find_next_nonzero_bit((addr), (size), 0) + +#endif /* 0 */ + + /* This has no comment explaining that + it is space reserved for fsck, nor + does it give any guidance to + palmtop folks on how much they can + get away with reducing + this. Finally, why is it in this + file, what does it have to do with + super block operations? Actually, + an even better question might be, + why is this file separate from + reiserfs_fs.h at all? Then let us + ask, why is that named + reiserfs_fs.h rather than + reiser_fs.h? -Hans */ +/* sometimes reiserfs_truncate may require to allocate few new blocks + to perform indirect2direct conversion. People probably used to + think, that truncate should work without problems on a filesystem + without free disk space. They may complain that they can not + truncate due to lack of free disk space. This space space allows us + to not worry about it. 500 is probably to much, but it should be + absolutely safe */ +#define SPARE_SPACE 500 + +extern inline unsigned long reiserfs_get_journal_block(struct super_block *s) { + return le32_to_cpu(SB_DISK_SUPER_BLOCK(s)->s_journal_block) ; +} +extern inline unsigned long reiserfs_get_journal_orig_size(struct super_block *s) { + return le32_to_cpu(SB_DISK_SUPER_BLOCK(s)->s_orig_journal_size) ; +} + +/* prototypes from ioctl.c */ +int reiserfs_ioctl (struct inode * inode, struct file * filp, + unsigned int cmd, unsigned long arg); +int reiserfs_unpack (struct inode * inode, struct file * filp); + +/* ioctl's command */ +#define REISERFS_IOC_UNPACK _IOW(0xCD,1,long) + +#endif /* _LINUX_REISER_FS_H */ + + + + + diff -u -r --new-file linux/include/linux/reiserfs_fs_i.h v2.4.0-test8/linux/include/linux/reiserfs_fs_i.h --- linux/include/linux/reiserfs_fs_i.h Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/include/linux/reiserfs_fs_i.h Thu Sep 14 13:07:11 2000 @@ -0,0 +1,56 @@ +#ifndef _REISER_FS_I +#define _REISER_FS_I + +#define REISERFS_N_BLOCKS 10 + +/* these are used to keep track of the pages that need +** flushing before the current transaction can commit +*/ +struct reiserfs_page_list ; + +struct reiserfs_inode_info { + //struct pipe_inode_info reserved; + __u32 i_key [4];/* key is still 4 32 bit integer */ + + int i_version; // this says whether file is old or new + + int i_pack_on_close ; // file might need tail packing on close + + __u32 i_first_direct_byte; // offset of first byte stored in direct item. + + /* pointer to the page that must be flushed before + ** the current transaction can commit. + ** + ** this pointer is only used when the tail is converted back into + ** a direct item, or the file is deleted + */ + struct reiserfs_page_list *i_converted_page ; + + /* we save the id of the transaction when we did the direct->indirect + ** conversion. That allows us to flush the buffers to disk + ** without having to upate this inode to zero out the converted + ** page variable + */ + int i_conversion_trans_id ; + + /* So is this an extent, or what? What + happens when the disk is badly + fragmented? I would prefer a list + of blocks, and I would prefer that + you simply take the first + PREALLOC_SIZE blocks after + search_start and put them on the + list, contiguous or not. Maybe I + don't fully understand this code. I + really prefer allocate on flush + conceptually..... -Hans */ + //For preallocation + int i_prealloc_block; + int i_prealloc_count; + + //nopack-attribute + int nopack; +}; + + +#endif diff -u -r --new-file linux/include/linux/reiserfs_fs_sb.h v2.4.0-test8/linux/include/linux/reiserfs_fs_sb.h --- linux/include/linux/reiserfs_fs_sb.h Thu Jan 1 03:00:00 1970 +++ v2.4.0-test8/linux/include/linux/reiserfs_fs_sb.h Sun Aug 20 02:38:39 2000 @@ -0,0 +1,362 @@ +/* Copyright 1996-2000 Hans Reiser, see reiserfs/README for licensing + * and copyright details */ + +#ifndef _LINUX_REISER_FS_SB +#define _LINUX_REISER_FS_SB + +#include <linux/tqueue.h> + +// +// super block's field values +// +/*#define REISERFS_VERSION 0 undistributed bitmap */ +/*#define REISERFS_VERSION 1 distributed bitmap and resizer*/ +#define REISERFS_VERSION_2 2 /* distributed bitmap, resizer, 64-bit, etc*/ +#define UNSET_HASH 0 // read_super will guess about, what hash names + // in directories were sorted with +#define TEA_HASH 1 +#define YURA_HASH 2 +#define R5_HASH 3 +#define DEFAULT_HASH R5_HASH + +/* this is the on disk super block */ + +struct reiserfs_super_block +{ + __u32 s_block_count; + __u32 s_free_blocks; /* free blocks count */ + __u32 s_root_block; /* root block number */ + __u32 s_journal_block; /* journal block number */ + __u32 s_journal_dev; /* journal device number */ + + /* Since journal size is currently a #define in a header file, if + ** someone creates a disk with a 16MB journal and moves it to a + ** system with 32MB journal default, they will overflow their journal + ** when they mount the disk. s_orig_journal_size, plus some checks + ** while mounting (inside journal_init) prevent that from happening + */ + + __u32 s_orig_journal_size; + __u32 s_journal_trans_max ; /* max number of blocks in a transaction. */ + __u32 s_journal_block_count ; /* total size of the journal. can change over time */ + __u32 s_journal_max_batch ; /* max number of blocks to batch into a trans */ + __u32 s_journal_max_commit_age ; /* in seconds, how old can an async commit be */ + __u32 s_journal_max_trans_age ; /* in seconds, how old can a transaction be */ + __u16 s_blocksize; /* block size */ + __u16 s_oid_maxsize; /* max size of object id array, see get_objectid() commentary */ + __u16 s_oid_cursize; /* current size of object id array */ + __u16 s_state; /* valid or error */ + char s_magic[12]; /* reiserfs magic string indicates that file system is reiserfs */ + __u32 s_hash_function_code; /* indicate, what hash fuction is being use to sort names in a directory*/ + __u16 s_tree_height; /* height of disk tree */ + __u16 s_bmap_nr; /* amount of bitmap blocks needed to address each block of file system */ + __u16 s_version; /* I'd prefer it if this was a string, + something like "3.6.4", and maybe + 16 bytes long mostly unused. We + don't need to save bytes in the + superblock. -Hans */ + char s_unused[128] ; /* zero filled by mkreiserfs */ +}; + +#define SB_SIZE (sizeof(struct reiserfs_super_block)) + +/* this is the super from 3.5.X, where X >= 10 */ +struct reiserfs_super_block_v1 +{ + __u32 s_block_count; /* blocks count */ + __u32 s_free_blocks; /* free blocks count */ + __u32 s_root_block; /* root block number */ + __u32 s_journal_block; /* journal block number */ + __u32 s_journal_dev; /* journal device number */ + __u32 s_orig_journal_size; /* size of the journal on FS creation. used to make sure they don't overflow it */ + __u32 s_journal_trans_max ; /* max number of blocks in a transaction. */ + __u32 s_journal_block_count ; /* total size of the journal. can change over time */ + __u32 s_journal_max_batch ; /* max number of blocks to batch into a trans */ + __u32 s_journal_max_commit_age ; /* in seconds, how old can an async commit be */ + __u32 s_journal_max_trans_age ; /* in seconds, how old can a transaction be */ + __u16 s_blocksize; /* block size */ + __u16 s_oid_maxsize; /* max size of object id array, see get_objectid() commentary */ + __u16 s_oid_cursize; /* current size of object id array */ + __u16 s_state; /* valid or error */ + char s_magic[16]; /* reiserfs magic string indicates that file system is reiserfs */ + __u16 s_tree_height; /* height of disk tree */ + __u16 s_bmap_nr; /* amount of bitmap blocks needed to address each block of file system */ + __u16 s_reserved; +}; + +#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1)) + +/* LOGGING -- */ + +/* These all interelate for performance. +** +** If the journal block count is smaller than n transactions, you lose speed. +** I don't know what n is yet, I'm guessing 8-16. +** +** typical transaction size depends on the application, how often fsync is +** called, and how many metadata blocks you dirty in a 30 second period. +** The more small files (<16k) you use, the larger your transactions will +** be. +** +** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal +** to wrap, which slows things down. If you need high speed meta data updates, the journal should be big enough +** to prevent wrapping before dirty meta blocks get to disk. +** +** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal +** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping. +** +** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash. +** +*/ + +/* don't mess with these for a while */ + /* we have a node size define somewhere in reiserfs_fs.h. -Hans */ +#define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */ +#define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */ +#define JOURNAL_TRANS_MAX 1024 /* biggest possible single transaction, don't change for now (8/3/99) */ +#define JOURNAL_HASH_SIZE 8192 +#define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */ +#define JOURNAL_LIST_COUNT 64 + +/* these are bh_state bit flag offset numbers, for use in the buffer head */ + +#define BH_JDirty 16 /* journal data needs to be written before buffer can be marked dirty */ +#define BH_JDirty_wait 18 /* commit is done, buffer marked dirty */ +#define BH_JNew 19 /* buffer allocated during this transaction, no need to write if freed during this trans too */ + +/* ugly. metadata blocks must be prepared before they can be logged. +** prepared means unlocked and cleaned. If the block is prepared, but not +** logged for some reason, any bits cleared while preparing it must be +** set again. +*/ +#define BH_JPrepared 20 /* block has been prepared for the log */ +#define BH_JRestore_dirty 22 /* restore the dirty bit later */ + +/* One of these for every block in every transaction +** Each one is in two hash tables. First, a hash of the current transaction, and after journal_end, a +** hash of all the in memory transactions. +** next and prev are used by the current transaction (journal_hash). +** hnext and hprev are used by journal_list_hash. If a block is in more than one transaction, the journal_list_hash +** links it in multiple times. This allows the end_io handler, and flush_journal_list to remove just the cnode belonging +** to a given transaction. +*/ +struct reiserfs_journal_cnode { + struct buffer_head *bh ; /* real buffer head */ + kdev_t dev ; /* dev of real buffer head */ + unsigned long blocknr ; /* block number of real buffer head, == 0 when buffer on disk */ + int state ; + struct reiserfs_journal_list *jlist ; /* journal list this cnode lives in */ + struct reiserfs_journal_cnode *next ; /* next in transaction list */ + struct reiserfs_journal_cnode *prev ; /* prev in transaction list */ + struct reiserfs_journal_cnode *hprev ; /* prev in hash list */ + struct reiserfs_journal_cnode *hnext ; /* next in hash list */ +}; + +struct reiserfs_bitmap_node { + int id ; + char *data ; + struct list_head list ; +} ; + +struct reiserfs_list_bitmap { + struct reiserfs_journal_list *journal_list ; + struct reiserfs_bitmap_node **bitmaps ; +} ; + +/* +** transaction handle which is passed around for all journal calls +*/ +struct reiserfs_transaction_handle { + char *t_caller ; /* debugging use */ + int t_blocks_logged ; /* number of blocks this writer has logged */ + int t_blocks_allocated ; /* number of blocks this writer allocated */ + unsigned long t_trans_id ; /* sanity check, equals the current trans id */ + struct super_block *t_super ; /* super for this FS when journal_begin was + called. saves calls to reiserfs_get_super */ + +} ; + +/* +** one of these for each transaction. The most important part here is the j_realblock. +** this list of cnodes is used to hash all the blocks in all the commits, to mark all the +** real buffer heads dirty once all the commits hit the disk, +** and to make sure every real block in a transaction is on disk before allowing the log area +** to be overwritten */ +struct reiserfs_journal_list { + unsigned long j_start ; + unsigned long j_len ; + atomic_t j_nonzerolen ; + atomic_t j_commit_left ; + atomic_t j_flushing ; + atomic_t j_commit_flushing ; + atomic_t j_older_commits_done ; /* all commits older than this on disk*/ + unsigned long j_trans_id ; + time_t j_timestamp ; + struct reiserfs_list_bitmap *j_list_bitmap ; + struct buffer_head *j_commit_bh ; /* commit buffer head */ + struct reiserfs_journal_cnode *j_realblock ; + struct reiserfs_journal_cnode *j_freedlist ; /* list of buffers that were freed during this trans. free each of these on flush */ + wait_queue_head_t j_commit_wait ; /* wait for all the commit blocks to be flushed */ + wait_queue_head_t j_flush_wait ; /* wait for all the real blocks to be flushed */ +} ; + +struct reiserfs_page_list ; /* defined in reiserfs_fs.h */ + +struct reiserfs_journal { + struct buffer_head ** j_ap_blocks ; /* journal blocks on disk */ + struct reiserfs_journal_cnode *j_last ; /* newest journal block */ + struct reiserfs_journal_cnode *j_first ; /* oldest journal block. start here for traverse */ + + int j_state ; + unsigned long j_trans_id ; + unsigned long j_mount_id ; + unsigned long j_start ; /* start of current waiting commit (index into j_ap_blocks) */ + unsigned long j_len ; /* lenght of current waiting commit */ + unsigned long j_len_alloc ; /* number of buffers requested by journal_begin() */ + atomic_t j_wcount ; /* count of writers for current commit */ + unsigned long j_bcount ; /* batch count. allows turning X transactions into 1 */ + unsigned long j_first_unflushed_offset ; /* first unflushed transactions offset */ + unsigned long j_last_flush_trans_id ; /* last fully flushed journal timestamp */ + struct buffer_head *j_header_bh ; + + /* j_flush_pages must be flushed before the current transaction can + ** commit + */ + struct reiserfs_page_list *j_flush_pages ; + time_t j_trans_start_time ; /* time this transaction started */ + wait_queue_head_t j_wait ; /* wait journal_end to finish I/O */ + atomic_t j_wlock ; /* lock for j_wait */ + wait_queue_head_t j_join_wait ; /* wait for current transaction to finish before starting new one */ + atomic_t j_jlock ; /* lock for j_join_wait */ + int j_journal_list_index ; /* journal list number of the current trans */ + int j_list_bitmap_index ; /* number of next list bitmap to use */ + int j_must_wait ; /* no more journal begins allowed. MUST sleep on j_join_wait */ + int j_next_full_flush ; /* next journal_end will flush all journal list */ + int j_next_async_flush ; /* next journal_end will flush all async commits */ + + int j_cnode_used ; /* number of cnodes on the used list */ + int j_cnode_free ; /* number of cnodes on the free list */ + + struct reiserfs_journal_cnode *j_cnode_free_list ; + struct reiserfs_journal_cnode *j_cnode_free_orig ; /* orig pointer returned from vmalloc */ + + int j_free_bitmap_nodes ; + int j_used_bitmap_nodes ; + struct list_head j_bitmap_nodes ; + struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS] ; /* array of bitmaps to record the deleted blocks */ + struct reiserfs_journal_list j_journal_list[JOURNAL_LIST_COUNT] ; /* array of all the journal lists */ + struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for real buffer heads in current trans */ + struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for all the real buffer heads in all + the transactions */ +}; + +#define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick. magic string to find desc blocks in the journal */ + + +typedef __u32 (*hashf_t) (const char *, int); + +/* reiserfs union of in-core super block data */ +struct reiserfs_sb_info +{ + struct buffer_head * s_sbh; /* Buffer containing the super block */ + struct reiserfs_super_block * s_rs; /* Pointer to the super block in the buffer */ + struct buffer_head ** s_ap_bitmap; /* array of buffers, holding block bitmap */ + struct reiserfs_journal *s_journal ; /* pointer to journal information */ + unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ + + /* Comment? -Hans */ + void (*end_io_handler)(struct buffer_head *, int); + hashf_t s_hash_function; /* pointer to function which is used + to sort names in directory. Set on + mount */ + unsigned long s_mount_opt; /* reiserfs's mount options are set + here (currently - NOTAIL, NOLOG, + REPLAYONLY) */ + + /* Comment? -Hans */ + wait_queue_head_t s_wait; + /* To be obsoleted soon by per buffer seals.. -Hans */ + atomic_t s_generation_counter; // increased by one every time the + // tree gets re-balanced + + /* session statistics */ + int s_kmallocs; + int s_disk_reads; + int s_disk_writes; + int s_fix_nodes; + int s_do_balance; + int s_unneeded_left_neighbor; + int s_good_search_by_key_reada; + int s_bmaps; + int s_bmaps_without_search; + int s_direct2indirect; + int s_indirect2direct; +}; + + +#define NOTAIL 0 /* -o notail: no tails will be created in a session */ +#define REPLAYONLY 3 /* replay journal and return 0. Use by fsck */ +#define REISERFS_NOLOG 4 /* -o nolog: turn journalling off */ +#define REISERFS_CONVERT 5 /* -o conv: causes conversion of old + format super block to the new + format. If not specified - old + partition will be dealt with in a + manner of 3.5.x */ + +/* -o hash={tea, rupasov, r5, detect} is meant for properly mounting +** reiserfs disks from 3.5.19 or earlier. 99% of the time, this option +** is not required. If the normal autodection code can't determine which +** hash to use (because both hases had the same value for a file) +** use this option to force a specific hash. It won't allow you to override +** the existing hash on the FS, so if you have a tea hash disk, and mount +** with -o hash=rupasov, the mount will fail. +*/ +#define FORCE_TEA_HASH 6 /* try to force tea hash on mount */ +#define FORCE_RUPASOV_HASH 7 /* try to force rupasov hash on mount */ +#define FORCE_R5_HASH 8 /* try to force rupasov hash on mount */ +#define FORCE_HASH_DETECT 9 /* try to detect hash function on mount */ + +#define reiserfs_r5_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_R5_HASH)) +#define reiserfs_rupasov_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_RUPASOV_HASH)) +#define reiserfs_tea_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_TEA_HASH)) +#define reiserfs_hash_detect(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_HASH_DETECT)) + +#define dont_have_tails(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << NOTAIL)) +#define replay_only(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REPLAYONLY)) +#define reiserfs_dont_log(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NOLOG)) +#define old_format_only(s) ((SB_VERSION(s) != REISERFS_VERSION_2) && !((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_CONVERT))) + + +void reiserfs_file_buffer (struct buffer_head * bh, int list); +int reiserfs_is_super(struct super_block *s) ; +int journal_mark_dirty(struct reiserfs_transaction_handle *, struct super_block *, struct buffer_head *bh) ; +int flush_old_commits(struct super_block *s, int) ; +int show_reiserfs_locks(void) ; +void reiserfs_end_buffer_io_sync (struct buffer_head *bh, int uptodate) ; +void reiserfs_journal_end_io(struct buffer_head *bh, int uptodate) ; +int reiserfs_resize(struct super_block *, unsigned long) ; + +#define CARRY_ON 0 +#define SCHEDULE_OCCURRED 1 + + +#define SB_BUFFER_WITH_SB(s) ((s)->u.reiserfs_sb.s_sbh) +#define SB_JOURNAL(s) ((s)->u.reiserfs_sb.s_journal) +#define SB_JOURNAL_LIST(s) (SB_JOURNAL(s)->j_journal_list) +#define SB_JOURNAL_LIST_INDEX(s) (SB_JOURNAL(s)->j_journal_list_index) +#define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free) +#define SB_AP_BITMAP(s) ((s)->u.reiserfs_sb.s_ap_bitmap) + + +// on-disk super block fields converted to cpu form +#define SB_DISK_SUPER_BLOCK(s) ((s)->u.reiserfs_sb.s_rs) +#define SB_BLOCK_COUNT(s) le32_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_block_count)) +#define SB_FREE_BLOCKS(s) le32_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_free_blocks)) +#define SB_REISERFS_MAGIC(s) (SB_DISK_SUPER_BLOCK(s)->s_magic) +#define SB_ROOT_BLOCK(s) le32_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_root_block)) +#define SB_TREE_HEIGHT(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_tree_height)) +#define SB_REISERFS_STATE(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_state)) +#define SB_VERSION(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_version)) +#define SB_BMAP_NR(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_bmap_nr)) + +#endif /* _LINUX_REISER_FS_SB */