 /*- 7  * See the file LICENSE for redistribution information.   *!  * Copyright (c) 1996, 1997, 1998 ,  *	Sleepycat Software.  All rights reserved.  */    #include "config.h"    #ifndef lintJ static const char sccsid[] = "@(#)db_region.c	10.53 (Sleepycat) 11/10/98"; #endif /* not lint */    #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h>   #include <errno.h> #include <string.h>  #include <unistd.h>  #endif   #include "db_int.h"  #include "common_ext.h"   4 static int __db_growregion __P((REGINFO *, size_t));   /*  * __db_rattach --:  *	Optionally create and attach to a shared memory region.  *-  * PUBLIC: int __db_rattach __P((REGINFO *));   */  int  __db_rattach(infop)  	REGINFO *infop; {  	RLAYOUT *rlp, rl; 	size_t grow_region, size; 	ssize_t nr, nw;  	u_int32_t flags, mbytes, bytes;
 	u_int8_t *p; % 	int malloc_possible, ret, retry_cnt;    	grow_region = 0;  	malloc_possible = 1;  	ret = retry_cnt = 0;   > 	/* Round off the requested size to the next page boundary. */) 	DB_ROUNDOFF(infop->size, DB_VMPAGESIZE);   F 	/* Some architectures have hard limits on the maximum region size. */ #ifdef DB_REGIONSIZE_MAX' 	if (infop->size > DB_REGIONSIZE_MAX) { ? 		__db_err(infop->dbenv, "__db_rattach: cache size too large");  		return (EINVAL); 	} #endif  A 	/* Intialize the return information in the REGINFO structure. */  loop:	infop->addr = NULL;  	infop->fd = -1; 	infop->segid = INVALID_SEGID; 	if (infop->name != NULL) {  		__os_freestr(infop->name); 		infop->name = NULL;  	}/ 	F_CLR(infop, REGION_CANGROW | REGION_CREATED);    #ifndef HAVE_SPINLOCKS 	/*  	 * XXX B 	 * Lacking spinlocks, we must have a file descriptor for fcntl(2)B 	 * locking, which implies using mmap(2) to map in a regular file.C 	 * (Theoretically, we could probably get a file descriptor to lock @ 	 * other types of shared regions, but I don't see any reason to 	 * bother.) 	 * A 	 * Since we may be using shared memory regions, e.g., shmget(2), A 	 * and not mmap of regular files, the backing file may be only a A 	 * few tens of bytes in length.  So, this depends on the ability B 	 * to fcntl lock file offsets much larger than the physical file. 	 */ 	malloc_possible = 0;  #endif  
 #ifdef __hppa  	/*  	 * XXX E 	 * HP-UX won't permit mutexes to live in anything but shared memory. F 	 * Instantiate a shared region file on that architecture, regardless. 	 */ 	malloc_possible = 0;  #endif 	/* C 	 * If a region is truly private, malloc the memory.  That's faster 2 	 * than either anonymous memory or a shared file. 	 */9 	if (malloc_possible && F_ISSET(infop, REGION_PRIVATE)) { @ 		if ((ret = __os_malloc(infop->size, NULL, &infop->addr)) != 0) 			return (ret);   		/*@ 		 * It's sometimes significantly faster to page-fault in all of@ 		 * the region's pages before we run the application, as we see@ 		 * nasty side-effects when we page-fault while holding various? 		 * locks, i.e., the lock takes a long time to acquire because ? 		 * of the underlying page fault, and the other threads convoy  		 * behind the lock holder. 		 */   		if (DB_GLOBAL(db_region_init)) 			for (p = infop->addr;1 			    p < (u_int8_t *)infop->addr + infop->size;  			    p += DB_VMPAGESIZE) 				p[0] = '\0';  / 		F_SET(infop, REGION_CREATED | REGION_MALLOC);  		goto region_init;  	}   	/* E 	 * Get the name of the region (creating the file if a temporary file C 	 * is being used).  The dbenv contains the current DB environment, E 	 * including naming information.  The path argument may be a file or G 	 * a directory.  If path is a directory, it must exist and file is the E 	 * file name to be created inside the directory.  If path is a file,  	 * then file must be NULL.  	 */C 	if ((ret = __db_appname(infop->dbenv, infop->appname, infop->path, B 	    infop->file, infop->dbflags, &infop->fd, &infop->name)) != 0) 		return (ret);  	if (infop->fd != -1)  		F_SET(infop, REGION_CREATED);    	/* G 	 * Try to create the file, if we have authority.  We have to make sure G 	 * that multiple threads/processes attempting to simultaneously create F 	 * the region are properly ordered, so we open it using DB_CREATE andH 	 * DB_EXCL, so two attempts to create the region will return failure in 	 * one. 	 */5 	if (infop->fd == -1 && infop->dbflags & DB_CREATE) {  		flags = infop->dbflags;  		LF_SET(DB_EXCL);# 		if ((ret = __db_open(infop->name, 3 		    flags, flags, infop->mode, &infop->fd)) == 0)   			F_SET(infop, REGION_CREATED); 		else 			if (ret != EEXIST)  				goto errmsg; 	}  7 	/* If we couldn't create the file, try and open it. */  	if (infop->fd == -1) {  		flags = infop->dbflags;  		LF_CLR(DB_CREATE | DB_EXCL);# 		if ((ret = __db_open(infop->name, 3 		    flags, flags, infop->mode, &infop->fd)) != 0)  			goto errmsg;  	}   	/* % 	 * There are three cases we support: - 	 *    1. Named anonymous memory (shmget(2)). E 	 *    2. Unnamed anonymous memory (mmap(2): MAP_ANON/MAP_ANONYMOUS). 4 	 *    3. Memory backed by a regular file (mmap(2)). 	 * G 	 * We instantiate a backing file in all cases, which contains at least F 	 * the RLAYOUT structure, and in case #3, contains the actual region.. 	 * This is necessary for a couple of reasons: 	 * E 	 * First, the mpool region uses temporary files to name regions, and F 	 * since you may have multiple regions in the same directory, we need8 	 * a filesystem name to ensure that they don't collide. 	 * E 	 * Second, applications are allowed to forcibly remove regions, even E 	 * if they don't know anything about them other than the name.  If a F 	 * region is backed by anonymous memory, there has to be some way forE 	 * the application to find out that information, and, in some cases, 6 	 * determine ID information for the anonymous memory. 	 */& 	if (F_ISSET(infop, REGION_CREATED)) { 		/*= 		 * If we're using anonymous memory to back this region, set  		 * the flag. 		 */   		if (DB_GLOBAL(db_region_anon))" 			F_SET(infop, REGION_ANONYMOUS);   		/*? 		 * If we're using a regular file to back a region we created, # 		 * grow it to the specified size.  		 */ # 		if (!DB_GLOBAL(db_region_anon) && 7 		    (ret = __db_growregion(infop, infop->size)) != 0)  			goto err;	 	} else {  		/*> 		 * If we're joining a region, figure out what it looks like. 		 * 		 * XXX@ 		 * We have to figure out if the file is a regular file backing> 		 * a region that we want to map into our address space, or aA 		 * file with the information we need to find a shared anonymous 7 		 * region that we want to map into our address space.  		 *A 		 * All this noise is because some systems don't have a coherent A 		 * VM and buffer cache, and worse, if you mix operations on the < 		 * VM and buffer cache, half the time you hang the system. 		 *@ 		 * There are two possibilities.  If the file is the size of an? 		 * RLAYOUT structure, then we know that the real region is in > 		 * shared memory, because otherwise it would be bigger.  (As> 		 * the RLAYOUT structure size is smaller than a disk sector,@ 		 * the only way it can be this size is if deliberately written@ 		 * that way.)  In which case, retrieve the information we need@ 		 * from the RLAYOUT structure and use it to acquire the shared 		 * memory. 		 *? 		 * If the structure is larger than an RLAYOUT structure, then = 		 * the file is backing the shared memory region, and we use A 		 * the current size of the file without reading any information : 		 * from the file itself so that we don't confuse the VM. 		 *@ 		 * And yes, this makes me want to take somebody and kill them,- 		 * but I can't think of any other solution.  		 */ % 		if ((ret = __os_ioinfo(infop->name, . 		    infop->fd, &mbytes, &bytes, NULL)) != 0) 			goto errmsg; # 		size = mbytes * MEGABYTE + bytes;     		if (size <= sizeof(RLAYOUT)) { 			/* 5 			 * If the size is too small, the read fails or the 6 			 * valid flag is incorrect, assume it's because the6 			 * RLAYOUT information hasn't been written out yet, 			 * and retry. 			 */ 			if (size < sizeof(RLAYOUT)) 				goto retry; 
 			if ((ret = 8 			    __os_read(infop->fd, &rl, sizeof(rl), &nr)) != 0) 				goto retry; " 			if (rl.valid != DB_REGIONMAGIC) 				goto retry;n  6 			/* Copy the size, memory id and characteristics. */ 			size = rl.size; 			infop->segid = rl.segid; & 			if (F_ISSET(&rl, REGION_ANONYMOUS))# 				F_SET(infop, REGION_ANONYMOUS);r 		}.   		/*@ 		 * If the region is larger than we think, that's okay, use the> 		 * current size.  If it's smaller than we think, and we were> 		 * just using the default size, that's okay, use the current= 		 * size.  If it's smaller than we think and we really care,P@ 		 * save the size and we'll catch that further down -- we can't? 		 * correct it here because we have to have a lock to grow thed 		 * region. 		 */O< 		if (infop->size > size && !F_ISSET(infop, REGION_SIZEDEF)) 			grow_region = infop->size;r 		infop->size = size;e 	}   	/* E 	 * Map the region into our address space.  If we're creating it, the 4 	 * underlying routines will make it the right size. 	 *1D 	 * There are at least two cases where we can "reasonably" fail whenE 	 * we attempt to map in the region.  On Windows/95, closing the last/D 	 * reference to a region causes it to be zeroed out.  On UNIX, whenC 	 * using the shmget(2) interfaces, the region will no longer exist	H 	 * if the system was rebooted.  In these cases, the underlying map callF 	 * returns EAGAIN, and we *remove* our file and try again.  There areE 	 * obvious races in doing this, but it should eventually settle downp8 	 * to a winner and then things should proceed normally. 	 */5 	if ((ret = __db_mapregion(infop->name, infop)) != 0)R 		if (ret == EAGAIN) { 			/*_8 			 * Pretend we created the region even if we didn't so+ 			 * that our error processing unlinks it.p 			 */  			F_SET(infop, REGION_CREATED); 			ret = 0;( 			goto retry; 		} else 			goto err;   region_init: 	/*o- 	 * Initialize the common region information.  	 *  	 * !!! G 	 * We have to order the region creates so that two processes don't trymF 	 * to simultaneously create the region.  This is handled by using theI 	 * DB_CREATE and DB_EXCL flags when we create the "backing" region file.l 	 *.H 	 * We also have to order region joins so that processes joining regionsE 	 * never see inconsistent data.  We'd like to play permissions gamesiE 	 * with the backing file, but we can't because WNT filesystems won't  	 * open a file mode 0.y 	 */ 	rlp = (RLAYOUT *)infop->addr;& 	if (F_ISSET(infop, REGION_CREATED)) { 		/*> 		 * The process creating the region acquires a lock before it@ 		 * sets the valid flag.  Any processes joining the region will4 		 * check the valid flag before acquiring the lock. 		 *A 		 * Check the return of __db_mutex_init() and __db_mutex_lock(),i? 		 * even though we don't usually check elsewhere.  This is the'A 		 * first lock we initialize and acquire, and we have to know ifr> 		 * it fails.  (It CAN fail, e.g., SunOS, when using fcntl(2)? 		 * for locking, with an in-memory filesystem specified as thec 		 * database home.) 		 */n( 		if ((ret = __db_mutex_init(&rlp->lock,2 		    MUTEX_LOCK_OFFSET(rlp, &rlp->lock))) != 0 ||: 		    (ret = __db_mutex_lock(&rlp->lock, infop->fd)) != 0) 			goto err;  4 		/* Initialize the remaining region information. */ 		rlp->refcnt = 1; 		rlp->size = infop->size;6 		db_version(&rlp->majver, &rlp->minver, &rlp->patch); 		rlp->panic = 0;o 		rlp->segid = infop->segid; 		rlp->flags = 0;h' 		if (F_ISSET(infop, REGION_ANONYMOUS))y  			F_SET(rlp, REGION_ANONYMOUS);   		/*? 		 * Fill in the valid field last -- use a magic number, memoryi? 		 * may not be zero-filled, and we want to minimize the chancet 		 * for collision.m 		 */s 		rlp->valid = DB_REGIONMAGIC;   		/*> 		 * If the region is anonymous, write the RLAYOUT information@ 		 * into the backing file so that future region join and unlink 		 * calls can find it.  		 * 		 * XXXA 		 * We MUST do the seek before we do the write.  On Win95, while)= 		 * closing the last reference to an anonymous shared region > 		 * doesn't discard the region, it does zero it out.  So, the= 		 * REGION_CREATED may be set, but the file may have alreadyt> 		 * been written and the file descriptor may be at the end of 		 * the file. 		 */R) 		if (F_ISSET(infop, REGION_ANONYMOUS)) {r8 			if ((ret = __os_seek(infop->fd, 0, 0, 0, 0, 0)) != 0)
 				goto err; 
 			if ((ret =f; 			    __os_write(infop->fd, rlp, sizeof(*rlp), &nw)) != 0)_
 				goto err;r 		}_	 	} else {f@ 		/* Check to see if the region has had catastrophic failure. */ 		if (rlp->panic) {N 			ret = DB_RUNRECOVERY; 			goto err; 		}	   		/*> 		 * Check the valid flag to ensure the region is initialized.? 		 * If the valid flag has not been set, the mutex may not haveF= 		 * been initialized, and an attempt to get it could lead to  		 * random behavior.n 		 */d# 		if (rlp->valid != DB_REGIONMAGIC)g 			goto retry;   		/* Get the region lock. *// 		(void)__db_mutex_lock(&rlp->lock, infop->fd);2   		/*> 		 * We now own the region.  There are a couple of things that" 		 * may have gone wrong, however. 		 *? 		 * Problem #1: while we were waiting for the lock, the regionc@ 		 * was deleted.  Detected by re-checking the valid flag, since0 		 * it's cleared by the delete region routines. 		 */r% 		if (rlp->valid != DB_REGIONMAGIC) {i2 			(void)__db_mutex_unlock(&rlp->lock, infop->fd); 			goto retry; 		}c   		/*A 		 * Problem #2: We want a bigger region than has previously beenyA 		 * created.  Detected by checking if the region is smaller thanl@ 		 * our caller requested.  If it is, we grow the region, (which+ 		 * does the detach and re-attach for us).h 		 */  		if (grow_region != 0 &&d4 		    (ret = __db_rgrow(infop, grow_region)) != 0) {2 			(void)__db_mutex_unlock(&rlp->lock, infop->fd); 			goto err; 		}    		/*= 		 * Problem #3: when we checked the size of the file, it wasT= 		 * still growing as part of creation.  Detected by the fact 8 		 * that infop->size isn't the same size as the region. 		 */B! 		if (infop->size != rlp->size) {i2 			(void)__db_mutex_unlock(&rlp->lock, infop->fd); 			goto retry; 		}a  & 		/* Increment the reference count. */ 		++rlp->refcnt; 	}  / 	/* Return the region in a locked condition. */   	 	if (0) {rF errmsg:		__db_err(infop->dbenv, "%s: %s", infop->name, strerror(ret));   err:! retry:		/* Discard the region. */a 		if (infop->addr != NULL) {! 			(void)__db_unmapregion(infop);h 			infop->addr = NULL; 		}i  ! 		/* Discard the backing file. */  		if (infop->fd != -1) { 			(void)__os_close(infop->fd);n 			infop->fd = -1;  & 			if (F_ISSET(infop, REGION_CREATED))# 				(void)__os_unlink(infop->name);* 		}*   		/* Discard the name. */m 		if (infop->name != NULL) { 			__os_freestr(infop->name);n 			infop->name = NULL; 		}s   		/*8 		 * If we had a temporary error, wait a few seconds and 		 * try again.T 		 */e 		if (ret == 0) {  			if (++retry_cnt <= 3) {! 				__os_sleep(retry_cnt * 2, 0);  				goto loop; 			} 			ret = EAGAIN; 		}  	}   	/*a 	 * XXXwE 	 * HP-UX won't permit mutexes to live in anything but shared memory.rF 	 * Instantiate a shared region file on that architecture, regardless. 	 *y 	 * XXX D 	 * There's a problem in cleaning this up on application exit, or onD 	 * application failure.  If an application opens a database withoutF 	 * an environment, we create a temporary backing mpool region for it.C 	 * That region is marked REGION_PRIVATE, but as HP-UX won't permit C 	 * mutexes to live in anything but shared memory, we instantiate a	D 	 * real file plus a memory region of some form.  If the applicationE 	 * crashes, the necessary information to delete the backing file andcC 	 * any system region (e.g., the shmget(2) segment ID) is no longeri@ 	 * available.  We can't completely fix the problem, but we try. 	 *rE 	 * The underlying UNIX __db_mapregion() code preferentially uses theAG 	 * mmap(2) interface with the MAP_ANON/MAP_ANONYMOUS flags for regions G 	 * that are marked REGION_PRIVATE.  This means that we normally aren'toG 	 * holding any system resources when we get here, in which case we canzD 	 * delete the backing file.  This results in a short race, from the# 	 * __db_open() call above to here.! 	 * D 	 * If, for some reason, we are holding system resources when we getF 	 * here, we don't have any choice -- we can't delete the backing fileA 	 * because we may need it to detach from the resources.  Set theOD 	 * REGION_LASTDETACH flag, so that we do all necessary cleanup when& 	 * the application closes the region. 	 */F 	if (F_ISSET(infop, REGION_PRIVATE) && !F_ISSET(infop, REGION_MALLOC))( 		if (F_ISSET(infop, REGION_HOLDINGSYS))# 			F_SET(infop, REGION_LASTDETACH);  		else {  			F_SET(infop, REGION_REMOVED);  			F_CLR(infop, REGION_CANGROW);   			(void)__os_close(infop->fd);n" 			(void)__os_unlink(infop->name); 		}t   	return (ret); }    /*  * __db_rdetach --)  *	De-attach from a shared memory region.p  *-  * PUBLIC: int __db_rdetach __P((REGINFO *));   */o intz __db_rdetach(infop)  	REGINFO *infop; {i 	RLAYOUT *rlp; 	int detach, ret, t_ret;  	 	ret = 0;    	/*iD 	 * If the region was removed when it was created, no further action 	 * is required. 	 */$ 	if (F_ISSET(infop, REGION_REMOVED)) 		goto done; 	/*oD 	 * If the region was created in memory returned by malloc, the only* 	 * action required is freeing the memory. 	 */% 	if (F_ISSET(infop, REGION_MALLOC)) {  		__os_free(infop->addr, 0); 		goto done; 	}  @ 	/* Otherwise, attach to the region and optionally delete it. */ 	rlp = infop->addr;a   	/* Get the lock. */. 	(void)__db_mutex_lock(&rlp->lock, infop->fd);  % 	/* Decrement the reference count. */  	if (rlp->refcnt == 0) 		__db_err(infop->dbenv,7 		    "region rdetach: reference count went to zero!");f 	else= 		--rlp->refcnt;   	/* @ 	 * If we're going to remove the region, clear the valid flag so@ 	 * that any region join that's blocked waiting for us will know 	 * what happened. 	 */ 	detach = 0;' 	if (F_ISSET(infop, REGION_LASTDETACH))o 		if (rlp->refcnt == 0) {  			detach = 1; 			rlp->valid = 0; 		} else 			ret = EBUSY;r   	/* Release the lock. */0 	(void)__db_mutex_unlock(&rlp->lock, infop->fd);  ) 	/* Close the backing file descriptor. */  	(void)__os_close(infop->fd);n 	infop->fd = -1;  ) 	/* Discard our mapping of the region. */t8 	if ((t_ret = __db_unmapregion(infop)) != 0 && ret == 0) 		ret = t_ret;  ! 	/* Discard the region itself. */m 	if (detach) { 		if ((t_ret => 		    __db_unlinkregion(infop->name, infop) != 0) && ret == 0) 			ret = t_ret; : 		if ((t_ret = __os_unlink(infop->name) != 0) && ret == 0) 			ret = t_ret;  	}   done:	/* Discard the name. */e 	if (infop->name != NULL) {t 		__os_freestr(infop->name); 		infop->name = NULL;e 	}   	return (ret); }t   /*  * __db_runlink --  *	Remove a region.   *2  * PUBLIC: int __db_runlink __P((REGINFO *, int));  */k int  __db_runlink(infop, force) 	REGINFO *infop; 	int force;  {' 	RLAYOUT rl, *rlp;
 	size_t size;  	ssize_t nr; 	u_int32_t mbytes, bytes;  	int fd, ret, t_ret; 	char *name;   	/*n 	 * XXX)A 	 * We assume that we've created a new REGINFO structure for this A 	 * call, not used one that was already initialized.  Regardless,cC 	 * if anyone is planning to use it after we're done, they're going_ 	 * to be sorely disappointed. 	 *)E 	 * If force isn't set, we attach to the region, set a flag to deletenC 	 * the region on last close, and let the region delete code do thel	 	 * work.& 	 */ 	if (!force) {' 		if ((ret = __db_rattach(infop)) != 0)i 			return (ret);   		rlp = (RLAYOUT *)infop->addr;,1 		(void)__db_mutex_unlock(&rlp->lock, infop->fd);S  " 		F_SET(infop, REGION_LASTDETACH);   		return (__db_rdetach(infop));r 	}   	/*yG 	 * Otherwise, we don't want to attach to the region.  We may have beeniG 	 * called to clean up if a process died leaving a region locked and/ori4 	 * corrupted, which could cause the attach to hang. 	 */6 	if ((ret = __db_appname(infop->dbenv, infop->appname,B 	    infop->path, infop->file, infop->dbflags, NULL, &name)) != 0) 		return (ret);    	/*5D 	 * An underlying file is created for all regions other than privateD 	 * (REGION_PRIVATE) ones, regardless of whether or not it's used to= 	 * back the region.  If that file doesn't exist, we're done.	 	 */$ 	if (__os_exists(name, NULL) != 0) { 		__os_freestr(name);t
 		return (0);R 	}   	/*SG 	 * See the comments in __db_rattach -- figure out if this is a regular,D 	 * file backing a region or if it's a regular file with information 	 * about a region.* 	 */@ 	if ((ret = __db_open(name, DB_RDONLY, DB_RDONLY, 0, &fd)) != 0) 		goto errmsg;? 	if ((ret = __os_ioinfo(name, fd, &mbytes, &bytes, NULL)) != 0)_ 		goto errmsg;" 	size = mbytes * MEGABYTE + bytes;   	if (size <= sizeof(RLAYOUT)) {n7 		if ((ret = __os_read(fd, &rl, sizeof(rl), &nr)) != 0), 			goto errmsg;t# 		if (rl.valid != DB_REGIONMAGIC) {  			__db_err(infop->dbenv,d0 			    "%s: illegal region magic number", name); 			ret = EINVAL; 			goto err; 		}r  4 		/* Set the size, memory id and characteristics. */ 		infop->size = rl.size; 		infop->segid = rl.segid;% 		if (F_ISSET(&rl, REGION_ANONYMOUS))g" 			F_SET(infop, REGION_ANONYMOUS);	 	} else {* 		infop->size = size;e 		infop->segid = INVALID_SEGID;e 	}  $ 	/* Remove the underlying region. */& 	ret = __db_unlinkregion(name, infop);   	/*eC 	 * Unlink the backing file.  Close the open file descriptor first,OC 	 * because some architectures (e.g., Win32) won't unlink a file ift! 	 * open file descriptors remain.m 	 */ 	(void)__os_close(fd);2 	if ((t_ret = __os_unlink(name)) != 0 && ret == 0) 		ret = t_ret;  	 	if (0) {a? errmsg:		__db_err(infop->dbenv, "%s: %s", name, strerror(ret));i err:		(void)__os_close(fd);a 	}   	__os_freestr(name); 	return (ret); }r   /*  * __db_rgrow --  *	Extend a region.,  *3  * PUBLIC: int __db_rgrow __P((REGINFO *, size_t));l  */i int> __db_rgrow(infop, new_size)	 	REGINFO *infop; 	size_t new_size;t {s 	RLAYOUT *rlp; 	size_t increment;	 	int ret;    	/*  	 * !!! ? 	 * This routine MUST be called with the region already locked.  	 */  D 	/* The underlying routines have flagged if this region can grow. */% 	if (!F_ISSET(infop, REGION_CANGROW))t 		return (EINVAL);   	/*e? 	 * Round off the requested size to the next page boundary, andt, 	 * determine the additional space required. 	 */ 	rlp = (RLAYOUT *)infop->addr;& 	DB_ROUNDOFF(new_size, DB_VMPAGESIZE);" 	increment = new_size - rlp->size;  4 	if ((ret = __db_growregion(infop, increment)) != 0) 		return (ret);   & 	/* Update the on-disk region size. */ 	rlp->size = new_size;  . 	/* Detach from and reattach to the region. */* 	return (__db_rreattach(infop, new_size)); }T   /*  * __db_growregion --	  *	Grow a shared memory region.   */ 
 static int! __db_growregion(infop, increment)  	REGINFO *infop; 	size_t increment; {n 	db_pgno_t pages;N
 	size_t i; 	ssize_t nr, nw; 	u_int32_t relative;	 	int ret;w 	char buf[DB_VMPAGESIZE];n  % 	/* Seek to the end of the region. */r= 	if ((ret = __os_seek(infop->fd, 0, 0, 0, 0, SEEK_END)) != 0)l 		goto err;	  # 	/* Write nuls to the new bytes. */X 	memset(buf, 0, sizeof(buf));x   	/*i? 	 * Some systems require that all of the bytes of the region beo> 	 * written before it can be mapped and accessed randomly, and+ 	 * other systems don't zero out the pages.a 	 */ 	if (__db_mapinit())3 		/* Extend the region by writing each new page. */t2 		for (i = 0; i < increment; i += DB_VMPAGESIZE) {
 			if ((ret =n: 			    __os_write(infop->fd, buf, sizeof(buf), &nw)) != 0)
 				goto err;i 			if (nw != sizeof(buf))a
 				goto eio;r 		}m 	else {n 		/*? 		 * Extend the region by writing the last page.  If the regiona? 		 * is >4Gb, increment may be larger than the maximum possibleb@ 		 * seek "relative" argument, as it's an unsigned 32-bit value.= 		 * Break the offset into pages of 1MB each so that we don'th= 		 * overflow (2^20 + 2^32 is bigger than any memory I expecte 		 * to see for awhile). 		 */h1 		pages = (increment - DB_VMPAGESIZE) / MEGABYTE;N4 		relative = (increment - DB_VMPAGESIZE) % MEGABYTE;! 		if ((ret = __os_seek(infop->fd,y4 		    MEGABYTE, pages, relative, 0, SEEK_CUR)) != 0) 			goto err;@ 		if ((ret = __os_write(infop->fd, buf, sizeof(buf), &nw)) != 0) 			goto err; 		if (nw != sizeof(buf)) 			goto eio;   		/*@ 		 * It's sometimes significantly faster to page-fault in all of@ 		 * the region's pages before we run the application, as we see@ 		 * nasty side-effects when we page-fault while holding various? 		 * locks, i.e., the lock takes a long time to acquire becauses? 		 * of the underlying page fault, and the other threads convoyi 		 * behind the lock holder. 		 *> 		 * We also use REGION_INIT to guarantee that there is enough? 		 * disk space for the region, so we also write a byte to eachS< 		 * page.  Reading the byte is insufficient as some systems= 		 * (e.g., Solaris) do not instantiate disk pages to satisfy>? 		 * a read, and so we don't know if there is enough disk spaceD 		 * or not. 		 */d" 		if (DB_GLOBAL(db_region_init)) {  			pages = increment / MEGABYTE;# 			relative = increment % MEGABYTE;G" 			if ((ret = __os_seek(infop->fd,5 			    MEGABYTE, pages, relative, 1, SEEK_END)) != 0)e
 				goto err;o  # 			/* Write a byte to each page. */i3 			for (i = 0; i < increment; i += DB_VMPAGESIZE) {N 				if ((ret =1 				    __os_write(infop->fd, buf, 1, &nr)) != 0)m 					goto err; 				if (nr != 1) 					goto eio;# 				if ((ret = __os_seek(infop->fd, 4 				    0, 0, DB_VMPAGESIZE - 1, 0, SEEK_CUR)) != 0) 					goto err; 			} 		}} 	} 	return (0);   eio:	ret = EIO;o> err:	__db_err(infop->dbenv, "region grow: %s", strerror(ret)); 	return (ret); }d   /*  * __db_rreattach --(  *	Detach from and reattach to a region.  *7  * PUBLIC: int __db_rreattach __P((REGINFO *, size_t));,  */  intg __db_rreattach(infop, new_size)  	REGINFO *infop; 	size_t new_size;; { 	 	int ret;f   #ifdef DIAGNOSTICe 	if (infop->name == NULL) {f: 		__db_err(infop->dbenv, "__db_rreattach: name was NULL"); 		return (EINVAL); 	} #endif 	/*/B 	 * If we're growing an already mapped region, we have to unmap itC 	 * and get it back.  We have it locked, so nobody else can get in,tC 	 * which makes it fairly straight-forward to do, as everybody else-B 	 * is going to block while we do the unmap/remap.  NB: if we failB 	 * to get it back, the pooch is genuinely screwed, because we can) 	 * never release the lock we're holding.b 	 *pF 	 * Detach from the region.  We have to do this first so architecturesF 	 * that don't permit a file to be mapped into different places in the? 	 * address space simultaneously, e.g., HP's PaRisc, will work.t 	 */* 	if ((ret = __db_unmapregion(infop)) != 0) 		return (ret);   < 	/* Update the caller's REGINFO size to the new map size. */ 	infop->size = new_size;   	/* Attach to the region. */* 	ret = __db_mapregion(infop->name, infop);   	return (ret); } gion.   *2  * PUBLIC: int __db_runlink __P((REGINFO *, int));  */k int  __db_runlink(infop, force) 	REGINFO *infop; 	int force;  {' 	RLAYOUT rl, *rlp;
 	size_t size;  	ssize_t nr; 	u_int32_t mbytes, bytes;  	int fd, ret, t_ret; 	char *name;   	/*n 	 * XXX)A 	 * We assume that we've created a new REGINFO structure for this A 	 * call, not used one that was already initialized.  Regardless,cC 	 * if anyone is planning to use it after we're done, they'r                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                