红联Linux门户
Linux帮助

Linux+page+cache+里的几个函数的源码分析

发布时间:2014-11-30 15:32:12来源:linux网站作者:raintungli

page cache 在linux vfs 中是比较重要的一层,其功能就不详细介绍了。主要介绍了几个关键性函数,容易帮助了解page cache里的整体逻辑和流程。


先看一下page 的结构体

/*
* Each physical page in the system has a struct page associated with
* it to keep track of whatever it is we are using the page for at the
* moment. Note that we have no way to track which tasks are using
* a page.
*/ 
struct page { 
unsigned long flags;/* Atomic flags, some possibly
* updated asynchronously */ 
atomic_t _count;/* Usage count, see below. */ 
atomic_t _mapcount; /* Count of ptes mapped in mms,
* to show when page is mapped
* & limit reverse map searches.
*/ 
union { 
struct { 
unsigned long private;  /* Mapping-private opaque data:
* usually used for buffer_heads
* if PagePrivate set; used for
* swp_entry_t if PageSwapCache;
* indicates order in the buddy
* system if PG_buddy is set.
*/ 
struct address_space *mapping;  /* If low bit clear, points to
* inode address_space, or NULL.
* If page mapped as anonymous
* memory, low bit is set, and
* it points to anon_vma object:
* see PAGE_MAPPING_ANON below.
*/ 
}; 
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS  
spinlock_t ptl; 
#endif  
}; 
pgoff_t index;  /* Our offset within mapping. */ 
struct list_head lru;   /* Pageout list, eg. active_list
 * protected by zone->lru_lock !
 */ 
/*
 * On machines where all RAM is mapped into kernel address space,
 * we can simply calculate the virtual address. On machines with
 * highmem some memory is mapped into kernel virtual memory
 * dynamically, so we need a place to store that address.
 * Note that this field could be 16 bits on x86 ... ;)
 *
 * Architectures with slow multiplication can define
 * WANT_PAGE_VIRTUAL in asm/page.h
 */ 
#if defined(WANT_PAGE_VIRTUAL)  
void *virtual;  /* Kernel virtual address (NULL if
   not kmapped, ie. highmem) */ 
#endif /* WANT_PAGE_VIRTUAL */  
};


page_cache_get() 主要是调用函数get_page

static inline void get_page(struct page *page) 

if (unlikely(PageCompound(page))) 
page = (struct page *)page_private(page); 
atomic_inc(&page->_count); 
}


主要page里的计数器+1,表示page引用的reference 次数

page_cache_release() 的核心函数 put_page_testzero

static inline int put_page_testzero(struct page *page) 

BUG_ON(atomic_read(&page->_count) == 0); 
return atomic_dec_and_test(&page->_count); 
}


显然是page的计数器-1, page的引用被释放。

page 的flags 参数, 在page 的结构体里定义了flags参数,用bit位来标识page的状态,定义在page-flags.h文件里

这是在32位机 和 64位 系统的关于flags 定义

32 bit  -------------------------------| FIELDS |   FLAGS |
64 bit  |   FIELDS | ?????? FLAGS |
63  32  0


从bit0-bit19是常用的,其他位保留给了mapping zone, node and SPARSEMEM
view plaincopy to clipboardprint?

#define PG_locked0  /* Page is locked. Don't touch. */  
#define PG_error 1  
#define PG_referenced2  
#define PG_uptodate  3  
 
#define PG_dirty 4  
#define PG_lru   5  
#define PG_active6  
#define PG_slab  7  /* slab debug (Suparna wants this) */  
 
#define PG_checked   8  /* kill me in 2.5.<early>. */  
#define PG_arch_19  
#define PG_reserved 10  
#define PG_private  11  /* Has something at ->private */  
 
#define PG_writeback12  /* Page is under writeback */  
#define PG_nosave   13  /* Used for system suspend/resume */  
#define PG_compound 14  /* Part of a compound page */  
#define PG_swapcache15  /* Swap page: swp_entry_t in private */  
 
#define PG_mappedtodisk 16  /* Has blocks allocated on-disk */  
#define PG_reclaim  17  /* To be reclaimed asap */  
#define PG_nosave_free  18  /* Free, should not be written */  
#define PG_buddy19  /* Page is free, on buddy lists */


SetPageUptodate 原子设置bit PG_uptodate 状态为1,表示改页被更新

#define SetPageUptodate(page) set_bit(PG_uptodate, &(page)->flags)


ClearPageUptodate 原子设置bit PG_uptodate 状态为0,表示页没有被更新

#define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags)

TestSetPageLocked 设置原子设置page locked状态,并返回改变前的原来状态
 
#define TestSetPageLocked(page) \ 
test_and_set_bit(PG_locked, &(page)->flags)


__lock_page 函数

void fastcall __lock_page(struct page *page) 

DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); 
 
__wait_on_bit_lock(page_waitqueue(page), &wait, sync_page, 
TASK_UNINTERRUPTIBLE); 

EXPORT_SYMBOL(__lock_page);


将当前进程设置成Task_uninterruptible状态,并将进程挂载到 wait对队列中,如果PG_Locked的状态为1时,触发sync_page的方法,只有在sync_page方法中才会调用schedule()调度当前进程,直到PG_locked的状态为0,注意当执行完__wait_on_bit_lock  的时候PG_locked仍然是1,因为__wait_on_bit_lock是用test_and_set_bit来进行while条件判断的,最后将进程设置成 TASK_RUNNING 状态,把该进程从wait 队列中移除。
 

unlock_page 函数

void fastcall unlock_page(struct page *page) 

smp_mb__before_clear_bit(); 
if (!TestClearPageLocked(page)) 
BUG(); 
smp_mb__after_clear_bit();  
wake_up_page(page, PG_locked); 

EXPORT_SYMBOL(unlock_page); 


设置PG_Locked 的状态是0,遍历等待队列,执行唤醒函数

static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, 
 int nr_exclusive, int sync, void *key) 

struct list_head *tmp, *next; 
 
list_for_each_safe(tmp, next, &q->task_list) { 
wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list); 
unsigned flags = curr->flags; 
 
if (curr->func(curr, mode, sync, key) && 
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) 
break; 


其中func的定义是

.func   = autoremove_wake_function, 

在autoremove_wake_function里,调用sched.c 的default_wake_function -> try_to_wake_up

将等待队列里的线程状态置为 TASK_RUNNING 并放置到运行队列中去。