 /*- 7  * See the file LICENSE for redistribution information.   *!  * Copyright (c) 1996, 1997, 1998 ,  *	Sleepycat Software.  All rights reserved.  */  #include "config.h"    #ifndef lintH static const char sccsid[] = "@(#)mp_fget.c	10.53 (Sleepycat) 11/16/98"; #endif /* not lint */    #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h>   #include <errno.h> #include <string.h>  #endif   #include "db_int.h"  #include "shqueue.h" #include "db_shash.h"  #include "mp.h"  #include "common_ext.h"    /*  * memp_fget --   *	Get a page from the file.  */  int ( memp_fget(dbmfp, pgnoaddr, flags, addrp) 	DB_MPOOLFILE *dbmfp;  	db_pgno_t *pgnoaddr;  	u_int32_t flags; 
 	void *addrp;  { 	 	BH *bhp;  	DB_MPOOL *dbmp; 	MPOOL *mp;  	MPOOLFILE *mfp; 	size_t bucket, mf_offset; 	u_int32_t st_hsearch; 	int b_incr, first, ret;   	dbmp = dbmfp->dbmp; 	mp = dbmp->mp;  	mfp = dbmfp->mfp;   	MP_PANIC_CHECK(dbmp);   	/*  	 * Validate arguments.  	 *  	 * !!! F 	 * Don't test for DB_MPOOL_CREATE and DB_MPOOL_NEW flags for readonlyF 	 * files here, and create non-existent pages in readonly files if theD 	 * flags are set, later.  The reason is that the hash access methodG 	 * wants to get empty pages that don't really exist in readonly files. G 	 * The only alternative is for hash to write the last "bucket" all the H 	 * time, which we don't want to do because one of our big goals in lifeG 	 * is to keep database files small.  It's sleazy as hell, but we catch : 	 * any attempt to actually write the file in memp_fput(). 	 */@ #define	OKFLAGS	(DB_MPOOL_CREATE | DB_MPOOL_LAST | DB_MPOOL_NEW) 	if (flags != 0) { 		if ((ret =@ 		    __db_fchk(dbmp->dbenv, "memp_fget", flags, OKFLAGS)) != 0) 			return (ret);   		switch (flags) { 		case DB_MPOOL_CREATE:  		case DB_MPOOL_LAST:  		case DB_MPOOL_NEW:	 		case 0: 	 			break; 
 		default:3 			return (__db_ferr(dbmp->dbenv, "memp_fget", 1));  		}  	}   #ifdef DIAGNOSTIC  	/*  	 * XXX E 	 * We want to switch threads as often as possible.  Yield every time + 	 * we get a new page to ensure contention.  	 */ 	if (DB_GLOBAL(db_pageyield))  		__os_yield(1); #endif  , 	/* Initialize remaining local variables. */! 	mf_offset = R_OFFSET(dbmp, mfp);  	bhp = NULL; 	st_hsearch = 0; 	b_incr = ret = 0;  ; 	/* Determine the hash bucket where this page will live. */ + 	bucket = BUCKET(mp, mf_offset, *pgnoaddr);    	LOCKREGION(dbmp);   	/* 1 	 * Check for the last or last + 1 page requests.  	 * D 	 * Examine and update the file's last_pgno value.  We don't care ifD 	 * the last_pgno value immediately changes due to another thread --G 	 * at this instant in time, the value is correct.  We do increment the C 	 * current last_pgno value if the thread is asking for a new page, D 	 * however, to ensure that two threads creating pages don't get the
 	 * same one.  	 */. 	if (LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) { 		if (LF_ISSET(DB_MPOOL_NEW))  			++mfp->last_pgno; 		*pgnoaddr = mfp->last_pgno; 1 		bucket = BUCKET(mp, mf_offset, mfp->last_pgno);    		if (LF_ISSET(DB_MPOOL_NEW))  			goto alloc; 	}   	/* F 	 * If mmap'ing the file and the page is not past the end of the file, 	 * just return a pointer. 	 * F 	 * The page may be past the end of the file, so check the page numberG 	 * argument against the original length of the file.  If we previously D 	 * returned pages past the original end of the file, last_pgno willF 	 * have been updated to match the "new" end of the file, and checkingG 	 * against it would return pointers past the end of the mmap'd region.  	 * F 	 * If another process has opened the file for writing since we mmap'dF 	 * it, we will start playing the game by their rules, i.e. everythingH 	 * goes through the cache.  All pages previously returned will be safe,9 	 * as long as the correct locking protocol was observed.  	 *  	 * XXX B 	 * We don't discard the map because we don't know when all of theC 	 * pages will have been discarded from the process' address space. @ 	 * It would be possible to do so by reference counting the openC 	 * pages from the mmap, but it's unclear to me that it's worth it.  	 */6 	if (dbmfp->addr != NULL && F_ISSET(mfp, MP_CAN_MMAP))( 		if (*pgnoaddr > mfp->orig_last_pgno) { 			/* 	 			 * !!! 8 			 * See the comment above about non-existent pages and 			 * the hash access method.  			 */$ 			if (!LF_ISSET(DB_MPOOL_CREATE)) { 				__db_err(dbmp->dbenv, % 				    "%s: page %lu doesn't exist", - 				    __memp_fn(dbmfp), (u_long)*pgnoaddr);  				ret = EINVAL; 
 				goto err;  			}
 		} else { 			*(void **)addrp =8 			    R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize); 			++mp->stat.st_map;  			++mfp->stat.st_map;
 			goto done;  		}   * 	/* Search the hash chain for the page. */6 	for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);7 	    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {  		++st_hsearch; < 		if (bhp->pgno != *pgnoaddr || bhp->mf_offset != mf_offset) 			continue;  & 		/* Increment the reference count. */! 		if (bhp->ref == UINT16_T_MAX) {  			__db_err(dbmp->dbenv,0 			    "%s: page %lu: reference count overflow",, 			    __memp_fn(dbmfp), (u_long)bhp->pgno); 			ret = EINVAL; 			goto err; 		}    		/*> 		 * Increment the reference count.  We may discard the region> 		 * lock as we evaluate and/or read the buffer, so we need to= 		 * ensure that it doesn't move and that its contents remain  		 * unchanged.  		 */ 
 		++bhp->ref; 
 		b_incr = 1;    		/** 	 	 * Any buffer we find might be trouble. 		 * 		 * BH_LOCKED -- > 		 * I/O is in progress.  Because we've incremented the buffer< 		 * reference count, we know the buffer can't move.  UnlockA 		 * the region lock, wait for the I/O to complete, and reacquire  		 * the region. 		 */ 7 		for (first = 1; F_ISSET(bhp, BH_LOCKED); first = 0) {  			UNLOCKREGION(dbmp);   			/* : 			 * Explicitly yield the processor if it's not the first9 			 * pass through this loop -- if we don't, we might end 9 			 * up running to the end of our CPU quantum as we will / 			 * simply be swapping between the two locks.  			 */ 			if (!first) 				__os_yield(1);   			LOCKBUFFER(dbmp, bhp); " 			/* Wait for I/O to finish... */ 			UNLOCKBUFFER(dbmp, bhp);  			LOCKREGION(dbmp); 		}    		/* 		 * BH_TRASH --? 		 * The contents of the buffer are garbage.  Shouldn't happen, < 		 * and this read is likely to fail, but might as well try. 		 */  		if (F_ISSET(bhp, BH_TRASH))  			goto reread;    		/* 		 * BH_CALLPGIN -- = 		 * The buffer was converted so it could be written, and the ) 		 * contents need to be converted again.  		 */ " 		if (F_ISSET(bhp, BH_CALLPGIN)) {- 			if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0) 
 				goto err;  			F_CLR(bhp, BH_CALLPGIN);  		}    		++mp->stat.st_cache_hit; 		++mfp->stat.st_cache_hit;  		*(void **)addrp = bhp->buf;  		goto done; 	}  7 alloc:	/* Allocate new buffer header and data space. */ + 	if ((ret = __memp_alloc(dbmp, sizeof(BH) - A 	    sizeof(u_int8_t) + mfp->stat.st_pagesize, NULL, &bhp)) != 0)  		goto err;    #ifdef DIAGNOSTIC 2 	if ((ALIGNTYPE)bhp->buf & (sizeof(size_t) - 1)) { 		__db_err(dbmp->dbenv, 5 		    "Internal error: BH data NOT size_t aligned.");  		ret = EINVAL;  		goto err;  	} #endif  	/* Initialize the BH fields. */ 	memset(bhp, 0, sizeof(BH)); 	LOCKINIT(dbmp, &bhp->mutex);  	bhp->ref = 1; 	bhp->pgno = *pgnoaddr;  	bhp->mf_offset = mf_offset;   	/* B 	 * Prepend the bucket header to the head of the appropriate MPOOLB 	 * bucket hash list.  Append the bucket header to the tail of the 	 * MPOOL LRU chain. 	 */: 	SH_TAILQ_INSERT_HEAD(&dbmp->htab[bucket], bhp, hq, __bh);( 	SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);   	/* 5 	 * If we created the page, zero it out and continue.  	 *  	 * !!! C 	 * Note: DB_MPOOL_NEW specifically doesn't call the pgin function. D 	 * If DB_MPOOL_CREATE is used, then the application's pgin functionE 	 * has to be able to handle pages of 0's -- if it uses DB_MPOOL_NEW, : 	 * it can detect all of its page creates, and not bother. 	 * C 	 * Otherwise, read the page into memory, optionally creating it if  	 * DB_MPOOL_CREATE is set.  	 */ 	if (LF_ISSET(DB_MPOOL_NEW)) { 		if (mfp->clear_len == 0). 			memset(bhp->buf, 0, mfp->stat.st_pagesize); 		else {' 			memset(bhp->buf, 0, mfp->clear_len);s #ifdef DIAGNOSTICu* 			memset(bhp->buf + mfp->clear_len, 0xdb,/ 			    mfp->stat.st_pagesize - mfp->clear_len);  #endif 		}e   		++mp->stat.st_page_create; 		++mfp->stat.st_page_create;y	 	} else {  		/*= 		 * It's possible for the read function to fail, which meanst> 		 * that we fail as well.  Note, the __memp_pgread() function; 		 * discards the region lock, so the buffer must be pinnedt@ 		 * down so that it cannot move and its contents are unchanged. 		 */r( reread:		if ((ret = __memp_pgread(dbmfp,. 		    bhp, LF_ISSET(DB_MPOOL_CREATE))) != 0) { 			/*b	 			 * !!!P9 			 * Discard the buffer unless another thread is waiting 9 			 * on our I/O to complete.  Regardless, the header has; 			 * the BH_TRASH flag set. 			 */ 			if (bhp->ref == 1);% 				__memp_bhfree(dbmp, mfp, bhp, 1);  			goto err; 		}    		++mp->stat.st_cache_miss;t 		++mfp->stat.st_cache_miss; 	}   	/*gH 	 * If we're returning a page after our current notion of the last-page,H 	 * update our information.  Note, there's no way to un-instantiate thisG 	 * page, it's going to exist whether it's returned to us dirty or not.y 	 */  	if (bhp->pgno > mfp->last_pgno) 		mfp->last_pgno = bhp->pgno;t   	++mp->stat.st_page_clean; 	*(void **)addrp = bhp->buf;  / done:	/* Update the chain search statistics. */i 	if (st_hsearch) { 		++mp->stat.st_hash_searches;, 		if (st_hsearch > mp->stat.st_hash_longest)) 			mp->stat.st_hash_longest = st_hsearch;R* 		mp->stat.st_hash_examined += st_hsearch; 	}   	++dbmfp->pinref;=   	UNLOCKREGION(dbmp);   	return (0);  ! err:	/* Discard our reference. */  	if (b_incr)
 		--bhp->ref;  	UNLOCKREGION(dbmp);   	*(void **)addrp = NULL; 	return (ret); }:		case 0: 	 			break; 
 		default:3 			return (__db_ferr(dbmp->dbenv, "memp_fget", 1));  		}  	}   #ifdef DIAGNOSTIC  	/*  	 * XXX E 	 * We want to switch threads as often as possible.  Yield every tim                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                