 /*- 7  * See the file LICENSE for redistribution information.   *!  * Copyright (c) 1996, 1997, 1998 ,  *	Sleepycat Software.  All rights reserved.  */  #include "config.h"    #ifndef lintI static const char sccsid[] = "@(#)mp_fopen.c	10.59 (Sleepycat) 12/11/98";  #endif /* not lint */    #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h>   #include <errno.h> #include <string.h>  #endif   #include "db_int.h"  #include "shqueue.h" #include "db_shash.h"  #include "mp.h"  #include "common_ext.h"   = static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *)); * static int __memp_mf_open __P((DB_MPOOL *,F     const char *, size_t, db_pgno_t, DB_MPOOL_FINFO *, MPOOLFILE **));   /*  * memp_fopen --+  *	Open a backing file for the memory pool.   */  int ; memp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp)  	DB_MPOOL *dbmp; 	const char *path; 	u_int32_t flags; 
 	int mode; 	size_t pagesize;  	DB_MPOOL_FINFO *finfop; 	DB_MPOOLFILE **retp;  { 	 	int ret;    	MP_PANIC_CHECK(dbmp);   	/* Validate arguments. */" 	if ((ret = __db_fchk(dbmp->dbenv,C 	    "memp_fopen", flags, DB_CREATE | DB_NOMMAP | DB_RDONLY)) != 0)  		return (ret);   # 	/* Require a non-zero pagesize. */  	if (pagesize == 0) { > 		__db_err(dbmp->dbenv, "memp_fopen: pagesize not specified"); 		return (EINVAL); 	}4 	if (finfop != NULL && finfop->clear_len > pagesize) 		return (EINVAL);   	return (__memp_fopen(dbmp, : 	    NULL, path, flags, mode, pagesize, 1, finfop, retp)); }    /*  * __memp_fopen --=  *	Open a backing file for the memory pool; internal version.   *G  * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, O  * PUBLIC:    u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **));   */  int L __memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp) 	DB_MPOOL *dbmp; 	MPOOLFILE *mfp; 	const char *path; 	u_int32_t flags;  	int mode, needlock; 	size_t pagesize;  	DB_MPOOL_FINFO *finfop; 	DB_MPOOLFILE **retp;  {  	DB_ENV *dbenv;  	DB_MPOOLFILE *dbmfp;  	DB_MPOOL_FINFO finfo; 	db_pgno_t last_pgno;  	size_t maxmap;  	u_int32_t mbytes, bytes; 	 	int ret;   	u_int8_t idbuf[DB_FILE_ID_LEN];
 	char *rpath;    	dbenv = dbmp->dbenv; 	 	ret = 0;  	rpath = NULL;   	/* C 	 * If mfp is provided, we take the DB_MPOOL_FINFO information from C 	 * the mfp.  We don't bother initializing everything, because some D 	 * of them are expensive to acquire.  If no mfp is provided and the3 	 * finfop argument is NULL, we default the values.  	 */ 	if (finfop == NULL) {# 		memset(&finfo, 0, sizeof(finfo));  		if (mfp != NULL) { 			finfo.ftype = mfp->ftype; 			finfo.pgcookie = NULL;  			finfo.fileid = NULL; # 			finfo.lsn_offset = mfp->lsn_off; $ 			finfo.clear_len = mfp->clear_len;
 		} else { 			finfo.ftype = 0;  			finfo.pgcookie = NULL;  			finfo.fileid = NULL;  			finfo.lsn_offset = -1;  			finfo.clear_len = 0;  		}  		finfop = &finfo; 	}  9 	/* Allocate and initialize the per-process structure. */ ? 	if ((ret = __os_calloc(1, sizeof(DB_MPOOLFILE), &dbmfp)) != 0)  		return (ret);  	dbmfp->dbmp = dbmp; 	dbmfp->fd = -1; 	dbmfp->ref = 1; 	if (LF_ISSET(DB_RDONLY))  		F_SET(dbmfp, MP_READONLY);   	if (path == NULL) { 		if (LF_ISSET(DB_RDONLY)) { 			__db_err(dbenv,8 			    "memp_fopen: temporary files can't be readonly"); 			ret = EINVAL; 			goto err; 		}  		last_pgno = 0;	 	} else { 4 		/* Get the real name for this file and open it. */  		if ((ret = __db_appname(dbenv,6 		    DB_APP_DATA, NULL, path, 0, NULL, &rpath)) != 0) 			goto err; 		if ((ret = __db_open(rpath, % 		   LF_ISSET(DB_CREATE | DB_RDONLY), 6 		   DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) {3 			__db_err(dbenv, "%s: %s", rpath, strerror(ret));  			goto err; 		}    		/*? 		 * Don't permit files that aren't a multiple of the pagesize, > 		 * and find the number of the last page in the file, all the0 		 * time being careful not to overflow 32 bits. 		 * 		 * !!!> 		 * We can't use off_t's here, or in any code in the mainline= 		 * library for that matter.  (We have to use them in the os ? 		 * stubs, of course, as there are system calls that take them ? 		 * as arguments.)  The reason is that some customers build in @ 		 * environments where an off_t is 32-bits, but still run where9 		 * offsets are 64-bits, and they pay us a lot of money.  		 */  		if ((ret = __os_ioinfo(rpath, 0 		    dbmfp->fd, &mbytes, &bytes, NULL)) != 0) {3 			__db_err(dbenv, "%s: %s", rpath, strerror(ret));  			goto err; 		}   < 		/* Page sizes have to be a power-of-two, ignore mbytes. */ 		if (bytes % pagesize != 0) { 			__db_err(dbenv,6 			    "%s: file size not a multiple of the pagesize", 			    rpath); 			ret = EINVAL; 			goto err; 		}   - 		last_pgno = mbytes * (MEGABYTE / pagesize);   		last_pgno += bytes / pagesize;   		/*B 		 * Get the file id if we weren't given one.  Generated file id's@ 		 * don't use timestamps, otherwise there'd be no chance of any% 		 * other process joining the party.  		 */  		if (finfop->fileid == NULL) { 8 			if ((ret = __os_fileid(dbenv, rpath, 0, idbuf)) != 0)
 				goto err;  			finfop->fileid = idbuf; 		}  	}   	/* D 	 * If we weren't provided an underlying shared object to join with,C 	 * find/allocate the shared file objects.  Also allocate space for $ 	 * for the per-process thread lock. 	 */ 	if (needlock) 		LOCKREGION(dbmp);    	if (mfp == NULL)  		ret = __memp_mf_open(dbmp,/ 		    path, pagesize, last_pgno, finfop, &mfp);  	else { 
 		++mfp->ref; 
 		ret = 0; 	} 	if (ret == 0 &&+ 	    F_ISSET(dbmp, MP_LOCKHANDLE) && (ret = H 	    __memp_alloc(dbmp, sizeof(db_mutex_t), NULL, &dbmfp->mutexp)) == 0)  		LOCKINIT(dbmp, dbmfp->mutexp);   	if (needlock) 		UNLOCKREGION(dbmp);  	if (ret != 0) 		goto err;    	dbmfp->mfp = mfp;   	/*  	 * If a file: 	 *	+ is read-only 	 *	+ isn't temporary , 	 *	+ doesn't require any pgin/pgout support# 	 *	+ the DB_NOMMAP flag wasn't set 0 	 *	+ and is less than mp_mmapsize bytes in size 	 * F 	 * we can mmap it instead of reading/writing buffers.  Don't do errorF 	 * checking based on the mmap call failure.  We want to do normal I/OF 	 * on the file if the reason we failed was because the file was on anG 	 * NFS mounted partition, and we can fail in buffer I/O just as easily  	 * as here. 	 *  	 * XXX F 	 * We'd like to test to see if the file is too big to mmap.  Since weH 	 * don't know what size or type off_t's or size_t's are, or the largestB 	 * unsigned integral type is, or what random insanity the local CG 	 * compiler will perpetrate, doing the comparison in a portable way is F 	 * flatly impossible.  Hope that mmap fails if the file is too large. 	 */6 #define	DB_MAXMMAPSIZE	(10 * 1024 * 1024)	/* 10 Mb. */! 	if (F_ISSET(mfp, MP_CAN_MMAP)) { # 		if (!F_ISSET(dbmfp, MP_READONLY))  			F_CLR(mfp, MP_CAN_MMAP);  		if (path == NULL)  			F_CLR(mfp, MP_CAN_MMAP);  		if (finfop->ftype != 0)  			F_CLR(mfp, MP_CAN_MMAP);  		if (LF_ISSET(DB_NOMMAP)) 			F_CLR(mfp, MP_CAN_MMAP); 5 		maxmap = dbenv == NULL || dbenv->mp_mmapsize == 0 ? * 		    DB_MAXMMAPSIZE : dbenv->mp_mmapsize;# 		if (mbytes > maxmap / MEGABYTE || B 		    (mbytes == maxmap / MEGABYTE && bytes >= maxmap % MEGABYTE)) 			F_CLR(mfp, MP_CAN_MMAP);  	} 	dbmfp->addr = NULL;! 	if (F_ISSET(mfp, MP_CAN_MMAP)) { 1 		dbmfp->len = (size_t)mbytes * MEGABYTE + bytes;  		if (__db_mapfile(rpath, 5 		    dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) {  			dbmfp->addr = NULL; 			F_CLR(mfp, MP_CAN_MMAP);  		}  	} 	if (rpath != NULL)  		__os_freestr(rpath);    	LOCKHANDLE(dbmp, dbmp->mutexp);+ 	TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q); " 	UNLOCKHANDLE(dbmp, dbmp->mutexp);   	*retp = dbmfp;  	return (0);   err:	/* A 	 * Note that we do not have to free the thread mutex, because we > 	 * never get to here after we have successfully allocated it. 	 */ 	if (rpath != NULL)  		__os_freestr(rpath); 	if (dbmfp->fd != -1)  		(void)__os_close(dbmfp->fd); 	if (dbmfp != NULL) ) 		__os_free(dbmfp, sizeof(DB_MPOOLFILE));  	return (ret); }    /*  * __memp_mf_open --  *	Open an MPOOLFILE.   */ 
 static int= __memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp)e 	DB_MPOOL *dbmp; 	const char *path; 	size_t pagesize;p 	db_pgno_t last_pgno;1 	DB_MPOOL_FINFO *finfop; 	MPOOLFILE **retp; {. 	MPOOLFILE *mfp;	 	int ret; 	 	void *p;i  " #define	ISTEMPORARY	(path == NULL)   	/*n> 	 * Walk the list of MPOOLFILE's, looking for a matching file./ 	 * Temporary files can't match previous files.  	 */ 	if (!ISTEMPORARY): 		for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);> 		    mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) { 			if (F_ISSET(mfp, MP_TEMP))m
 				continue;( 			if (!memcmp(finfop->fileid,8 			    R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {. 				if (finfop->clear_len != mfp->clear_len ||& 				    finfop->ftype != mfp->ftype ||, 				    pagesize != mfp->stat.st_pagesize) { 					__db_err(dbmp->dbenv,5 			    "%s: ftype, clear length or pagesize changed",; 					    path);t 					return (EINVAL);  				}d  2 				/* Found it: increment the reference count. */ 				++mfp->ref;  				*retp = mfp; 				return (0);( 			} 		}/    	/* Allocate a new MPOOLFILE. */D 	if ((ret = __memp_alloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0) 		return (ret); 
 	*retp = mfp;n    	/* Initialize the structure. */# 	memset(mfp, 0, sizeof(MPOOLFILE));  	mfp->ref = 1; 	mfp->ftype = finfop->ftype;# 	mfp->lsn_off = finfop->lsn_offset; $ 	mfp->clear_len = finfop->clear_len;   	/*>G 	 * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget,,F 	 * we have to know the last page in the file.  Figure it out and save 	 * it away. 	 */" 	mfp->stat.st_pagesize = pagesize;2 	mfp->orig_last_pgno = mfp->last_pgno = last_pgno;   	if (ISTEMPORARY)E 		F_SET(mfp, MP_TEMP); 	else { . 		/* Copy the file path into shared memory. */ 		if ((ret = __memp_alloc(dbmp,_2 		    strlen(path) + 1, &mfp->path_off, &p)) != 0) 			goto err;$ 		memcpy(p, path, strlen(path) + 1);  ? 		/* Copy the file identification string into shared memory. */; 		if ((ret = __memp_alloc(dbmp,I2 		    DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0) 			goto err;, 		memcpy(p, finfop->fileid, DB_FILE_ID_LEN);   		F_SET(mfp, MP_CAN_MMAP); 	}  / 	/* Copy the page cookie into shared memory. */_? 	if (finfop->pgcookie == NULL || finfop->pgcookie->size == 0) {  		mfp->pgcookie_len = 0; 		mfp->pgcookie_off = 0;	 	} else {e 		if ((ret = __memp_alloc(dbmp,i< 		    finfop->pgcookie->size, &mfp->pgcookie_off, &p)) != 0) 			goto err;< 		memcpy(p, finfop->pgcookie->data, finfop->pgcookie->size);- 		mfp->pgcookie_len = finfop->pgcookie->size;a 	}  8 	/* Prepend the MPOOLFILE to the list of MPOOLFILE's. */< 	SH_TAILQ_INSERT_HEAD(&dbmp->mp->mpfq, mfp, q, __mpoolfile);  	 	if (0) {	 err:		if (mfp->path_off != 0)n  			__db_shalloc_free(dbmp->addr,$ 			    R_ADDR(dbmp, mfp->path_off)); 		if (mfp->fileid_off != 0)l  			__db_shalloc_free(dbmp->addr,& 			    R_ADDR(dbmp, mfp->fileid_off)); 		if (mfp != NULL)& 			__db_shalloc_free(dbmp->addr, mfp);
 		mfp = NULL;p 	} 	return (0); }    /*  * memp_fclose --h,  *	Close a backing file for the memory pool.  */o ints memp_fclose(dbmfp) 	DB_MPOOLFILE *dbmfp;u {( 	DB_MPOOL *dbmp; 	int ret, t_ret;   	dbmp = dbmfp->dbmp;	 	ret = 0;L   	MP_PANIC_CHECK(dbmp);   	for (;;) {E! 		LOCKHANDLE(dbmp, dbmp->mutexp);	   		/*@ 		 * We have to reference count DB_MPOOLFILE structures as other@ 		 * threads may be using them.  The problem only happens if the= 		 * application makes a bad design choice.  Here's the path:i 		 * 		 * Thread A opens a database.d< 		 * Thread B uses thread A's DB_MPOOLFILE to write a buffer6 		 *    in order to free up memory in the mpool cache.= 		 * Thread A closes the database while thread B is using theD 		 *    DB_MPOOLFILE structure.  		 *> 		 * By opening all databases before creating the threads, and= 		 * closing them after the threads have exited, applicationsh@ 		 * get better performance and avoid the problem path entirely. 		 *A 		 * Regardless, holding the DB_MPOOLFILE to flush a dirty buffer!A 		 * is a short-term lock, even in worst case, since we better be @ 		 * the only thread of control using the DB_MPOOLFILE structure? 		 * to read pages *into* the cache.  Wait until we're the only	@ 		 * reference holder and remove the DB_MPOOLFILE structure from/ 		 * the list, so nobody else can even find it.i 		 */w 		if (dbmfp->ref == 1) {( 			TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q);	 			break;( 		}=# 		UNLOCKHANDLE(dbmp, dbmp->mutexp);,   		(void)__os_sleep(1, 0);) 	}" 	UNLOCKHANDLE(dbmp, dbmp->mutexp);  0 	/* Complain if pinned blocks never returned. */ 	if (dbmfp->pinref != 0)< 		__db_err(dbmp->dbenv, "%s: close: %lu blocks left pinned",/ 		    __memp_fn(dbmfp), (u_long)dbmfp->pinref);e  & 	/* Close the underlying MPOOLFILE. */$ 	(void)__memp_mf_close(dbmp, dbmfp);  $ 	/* Discard any mmap information. */ 	if (dbmfp->addr != NULL &&g: 	    (ret = __db_unmapfile(dbmfp->addr, dbmfp->len)) != 0) 		__db_err(dbmp->dbenv,	1 		    "%s: %s", __memp_fn(dbmfp), strerror(ret));a  E 	/* Close the file; temporary files may not yet have been created. */>? 	if (dbmfp->fd != -1 && (t_ret = __os_close(dbmfp->fd)) != 0) {b 		__db_err(dbmp->dbenv, 3 		    "%s: %s", __memp_fn(dbmfp), strerror(t_ret));f 		if (ret != 0)d 			t_ret = ret;s 	}   	/* Free memory. */  	if (dbmfp->mutexp != NULL) {l 		LOCKREGION(dbmp);c/ 		__db_shalloc_free(dbmp->addr, dbmfp->mutexp);c 		UNLOCKREGION(dbmp);) 	}  * 	/* Discard the DB_MPOOLFILE structure. */( 	__os_free(dbmfp, sizeof(DB_MPOOLFILE));   	return (ret); }o   /*  * __memp_mf_close ---  *	Close down an MPOOLFILE.r  */ 
 static int __memp_mf_close(dbmp, dbmfp) 	DB_MPOOL *dbmp; 	DB_MPOOLFILE *dbmfp;d {u 	BH *bhp, *nbhp; 	MPOOL *mp;= 	MPOOLFILE *mfp; 	size_t mf_offset;   	mp = dbmp->mp;  	mfp = dbmfp->mfp;   	LOCKREGION(dbmp);  9 	/* If more than a single reference, simply decrement. */	 	if (mfp->ref > 1) {
 		--mfp->ref;  		goto ret1; 	}   	/*nG 	 * Move any BH's held by the file to the free list.  We don't free thelH 	 * memory itself because we may be discarding the memory pool, and it'sH 	 * fairly expensive to reintegrate the buffers back into the region for 	 * no purpose.n 	 */! 	mf_offset = R_OFFSET(dbmp, mfp);eF 	for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {% 		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);    #ifdef DEBUG_NO_DIRTY < 		/* Complain if we find any blocks that were left dirty. */ 		if (F_ISSET(bhp, BH_DIRTY))o 			__db_err(dbmp->dbenv,1 			    "%s: close: pgno %lu left dirty; ref %lu",n 			    __memp_fn(dbmfp),, 			    (u_long)bhp->pgno, (u_long)bhp->ref); #endif  $ 		if (bhp->mf_offset == mf_offset) {  			if (F_ISSET(bhp, BH_DIRTY)) { 				++mp->stat.st_page_clean;i 				--mp->stat.st_page_dirty;	 			}$ 			__memp_bhfree(dbmp, mfp, bhp, 0);1 			SH_TAILQ_INSERT_HEAD(&mp->bhfq, bhp, q, __bh);S 		}m 	}  * 	/* Delete from the list of MPOOLFILEs. */1 	SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile);    	/* Free the space. */ 	if (mfp->path_off != 0)= 		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off));  	if (mfp->fileid_off != 0)? 		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off));s 	if (mfp->pgcookie_off != 0)A 		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off)); $ 	__db_shalloc_free(dbmp->addr, mfp);   ret1:	UNLOCKREGION(dbmp);; 	return (0); }pMP_CAN_MMAP)) { 1 		dbmfp->len = (size_t)mbytes * MEGABYTE + bytes;  		if (__db_mapfile(rpath, 5 		    dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) {  			dbmfp->addr = NULL; 			F_CLR(mfp, MP_CAN_MMAP);  		}  	} 	if (rpath != NULL)  		__os_freestr(rpath);    	LOCKHANDLE(dbmp, dbmp->mutexp);+ 	TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q); " 	UNLOCKHAN                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                