1,简单介绍
在2.6.16内核的netfilter中,netfilter一个重大修正思想就是将netfilter作为一个协议无关的框架,表现在内核结构树中单独建立net/netfilter目录,而在以前netfilter是附着在各个协议目录之下的,如在net/ipv4, net/ipv6等目录下。现在虽然各协议目录下也都有,但主要是处理和各协议相关的东西了,而一些共同的东西,就都放在net/netfilter目录下,文件名也有所改变,虽然现在还不是很独立,比如说net/netfilter/nf_conntrack_core.c和net/ipv4/netfilter/ip_conntrack_core.c就仍然很相似,让人觉得没必要那么分,但不少和协议无关的匹配和目标模块已经和协议分离,只在此目录下有,而不放在协议目录下了。
在net/netfilter下的匹配和目标模块文件名称都以“xt_”打头,如 xt_comment.c,xt_policy.c等
目标模块有:
	xt_CLASSIFY.c
	xt_NFQUEUE.c
	xt_NOTRACK.c
为了和iptables兼容(因为iptables找模块文件前缀是按“ipt_”或“ip6t_”找的),这些文件中增加了一个新的宏定义:MODULE_ALIAS,来表示模块的别名。
	如在xt_limit.c中就如下定义:
	MODULE_ALIAS("ipt_limit");
	MODULE_ALIAS("ip6t_limit");
	在include/linux/netfilter_ipv4/ip_tables.h中进行了以下定义:
	#define ipt_match xt_match
	#define ipt_target xt_target
	#define ipt_table xt_table
	
	2,代码分析
	以下是新匹配和目标模块的结构定义:
	struct xt_match
	{
	struct list_head list;
const char name[XT_FUNCTION_MAXNAMELEN-1];
	/* Return true or false: return FALSE and set *hotdrop = 1 to
	force immediate packet drop. */
	/* Arguments changed since 2.6.9, as this must now handle
	non-linear skb, using skb_header_pointer and
	skb_ip_make_writable. */
	int (*match)(const struct sk_buff *skb,
	 const struct net_device *in,
	 const struct net_device *out,
	 const struct xt_match *match,
	 const void *matchinfo,
	 int offset,
	 unsigned int protoff,
	 int *hotdrop);
	/* Called when user tries to insert an entry of this type. */
	/* Should return true or false. */
	int (*checkentry)(const char *tablename,
	const void *ip,
	const struct xt_match *match,
	void *matchinfo,
	unsigned int matchinfosize,
	unsigned int hook_mask);
	/* Called when entry of this type deleted. */
	void (*destroy)(const struct xt_match *match, void *matchinfo,
	  unsigned int matchinfosize);
	/* Called when userspace align differs from kernel space one */
	int (*compat)(void *match, void **dstptr, int *size, int convert);
	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
	struct module *me;
	char *table;
	unsigned int matchsize;
	unsigned int hooks;
	unsigned short proto;
	unsigned short family;
	u_int8_t revision;
	};
	/* Registration hooks for targets. */
	struct xt_target
	{
	struct list_head list;
const char name[XT_FUNCTION_MAXNAMELEN-1];
	/* Returns verdict. Argument order changed since 2.6.9, as this
	must now handle non-linear skbs, using skb_copy_bits and
	skb_ip_make_writable. */
	unsigned int (*target)(struct sk_buff **pskb,
	 const struct net_device *in,
	 const struct net_device *out,
	 unsigned int hooknum,
	 const struct xt_target *target,
	 const void *targinfo,
	 void *userdata);
	/* Called when user tries to insert an entry of this type:
	hook_mask is a bitmask of hooks from which it can be
	called. */
	/* Should return true or false. */
	int (*checkentry)(const char *tablename,
	const void *entry,
	const struct xt_target *target,
	void *targinfo,
	unsigned int targinfosize,
	unsigned int hook_mask);
	/* Called when entry of this type deleted. */
	void (*destroy)(const struct xt_target *target, void *targinfo,
	  unsigned int targinfosize);
	/* Called when userspace align differs from kernel space one */
	int (*compat)(void *target, void **dstptr, int *size, int convert);
	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
	struct module *me;
	char *table;
	unsigned int targetsize;
	unsigned int hooks;
	unsigned short proto;
	unsigned short family;
	u_int8_t revision;
	};
	/* Furniture shopping... */
	struct xt_table
	{
	struct list_head list;
	/* A unique name... */
	char name[XT_TABLE_MAXNAMELEN];
	/* What hooks you will enter on */
	unsigned int valid_hooks;
	/* Lock for the curtain */
	rwlock_t lock;
	/* Man behind the curtain... */
	//struct ip6t_table_info *private;
	void *private;
	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
	struct module *me;
	int af;  /* address/protocol family */
	};
	/* The table itself */
	struct xt_table_info
	{
	/* Size per table */
	unsigned int size;
	/* Number of entries: FIXME. --RR */
	unsigned int number;
	/* Initial number of entries. Needed for module usage count */
	unsigned int initial_entries;
	/* Entry points and underflows */
	unsigned int hook_entry[NF_IP_NUMHOOKS];
	unsigned int underflow[NF_IP_NUMHOOKS];
	/* ipt_entry tables: one per CPU */
	char *entries[NR_CPUS];
	};
	/* 主要结构 */
	struct xt_af {
	struct mutex mutex;
	struct list_head match;
	struct list_head target;
	struct list_head tables;
	struct mutex compat_mutex;
	};
	/*数据结构的管理模块 */
	static struct xt_af *xt;
	/* netfilter模块初始化*/
	static int __init xt_init(void)
	{
	int i;
	/* 每种协议分配一个资源 */
	xt = kmalloc(sizeof(struct xt_af) * NPROTO, GFP_KERNEL);
	if (!xt)
	 return -ENOMEM;
	for (i = 0; i < NPROTO; i++) {
	 mutex_init(&xt.mutex);
	#ifdef CONFIG_COMPAT
	 mutex_init(&xt.compat_mutex);
	#endif
	 /* 初始化table, target, match资源 */
	 INIT_LIST_HEAD(&xt.target);
	 INIT_LIST_HEAD(&xt.match);
	 INIT_LIST_HEAD(&xt.tables);
	}
	return 0;
	}
	目前2.6.16内核中支持了三类协议族,IPv4/IPv6/ARP,在各协议族中查找相应模块用的前缀为:
	static const char *xt_prefix[NPROTO] = {
	[AF_INET] = "ip",
	[AF_INET6] = "ip6",
	[NF_ARP] = "arp",
	};
	对应的具体前缀分别为“ipt”、“ip6t”、“arpt”。
而和老的2.4内核的struct ipt_match和struct ipt_target结构的主要区别是增加了compat函数,以及struct modulde *me参数后面的一系列参数,是和协议相关的,比如limit匹配,分别为ipv4和ipv6定义了匹配结构后,只有family参数不同,一个是AF_INET,另一个是AF_INET6,其他都相同,而挂接时并不会有问题,因为这些模块都分别挂接到不同协议族的链表:
	/* Registration hooks for targets. */
	int
	xt_register_target(struct xt_target *target)
	{
	int ret, af = target->family;
	ret = mutex_lock_interruptible(&xt[af].mutex);
	if (ret != 0)
	 return ret;
	 
	/* 添加 target*/
	list_add(&target->list, &xt[af].target);
	mutex_unlock(&xt[af].mutex);
	return ret;
	}
	int
	xt_register_match(struct xt_match *match)
	{
	int ret, af = match->family;
	ret = mutex_lock_interruptible(&xt[af].mutex);
	if (ret != 0)
	 return ret;
	/* 添加match */
	list_add(&match->list, &xt[af].match);
	mutex_unlock(&xt[af].mutex);
	return ret;
	}
table注册发生在各协议的netfilte中:
	int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
	{
	int ret;
	struct xt_table_info *newinfo;
	static struct xt_table_info bootstrap
	 = { 0, 0, 0, { 0 }, { 0 }, { } };
	void *loc_cpu_entry;
	newinfo = xt_alloc_table_info(repl->size);
	if (!newinfo)
	 return -ENOMEM;
	/* choose the copy on our node/cpu
	 * but dont care of preemption
	 */
	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
	memcpy(loc_cpu_entry, repl->entries, repl->size);
	ret = translate_table(table->name, table->valid_hooks,
	 newinfo, loc_cpu_entry, repl->size,
	 repl->num_entries,
	 repl->hook_entry,
	 repl->underflow);
	if (ret != 0) {
	 xt_free_table_info(newinfo);
	 return ret;
	}
	if (xt_register_table(table, &bootstrap, newinfo) != 0) {
	 xt_free_table_info(newinfo);
	 return ret;
	}
	return 0;
	}
	/* 分配table_info资源,注意这里是每个cpu会对应一个entry */
	struct xt_table_info *xt_alloc_table_info(unsigned int size)
	{
	struct xt_table_info *newinfo;
	int cpu;
	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages) /*超过物理内存空间*/
	 return NULL;
	newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL);
	if (!newinfo)
	 return NULL;
newinfo->size = size;
	for_each_possible_cpu(cpu) {/* 遍历每个cpu */
	 if (size <= PAGE_SIZE)
	  newinfo->entries[cpu] = kmalloc_node(size,
	GFP_KERNEL,
	cpu_to_node(cpu)); /* 直接分配物理空间 */
	 else
	  newinfo->entries[cpu] = vmalloc_node(size,
	cpu_to_node(cpu));/* 分配虚拟空间*/
	 if (newinfo->entries[cpu] == NULL) {
	  xt_free_table_info(newinfo);
	  return NULL;
	 }
	}
	return newinfo;
	}
	int xt_register_table(struct xt_table *table,
	struct xt_table_info *bootstrap,
	struct xt_table_info *newinfo)
	{
	int ret;
	struct xt_table_info *private;
	ret = mutex_lock_interruptible(&xt[table->af].mutex);
	if (ret != 0)
	 return ret;
	/* Don't autoload: we'd eat our tail... */
	if (list_named_find(&xt[table->af].tables, table->name)) {
	 ret = -EEXIST;
	 goto unlock;
	}
	/* Simplifies replace_table code. */
	table->private = bootstrap;
	rwlock_init(&table->lock);
	if (!xt_replace_table(table, 0, newinfo, &ret))
	 goto unlock;
	private = table->private;
	duprintf("table->private->number = %u\n", private->number);
	/* save number of initial entries */
	private->initial_entries = private->number;
list_prepend(&xt[table->af].tables, table);
	ret = 0;
	unlock:
	mutex_unlock(&xt[table->af].mutex);
	return ret;
	}
	但在进行实际匹配目标查找时会进行名字、协议族、表名、挂接点、协议等的比较,如匹配的检查
	int xt_check_match(const struct xt_match *match, unsigned short family,
	unsigned int size, const char *table, unsigned int hook_mask,
	 unsigned short proto, int inv_proto)
	{
	if (XT_ALIGN(match->matchsize) != size) {
	 printk("%s_tables: %s match: invalid size %Zu != %u\n",
	xt_prefix[family], match->name,
	XT_ALIGN(match->matchsize), size);
	 return -EINVAL;
	}
	if (match->table && strcmp(match->table, table)) {
	 printk("%s_tables: %s match: only valid in %s table, not %s\n",
	xt_prefix[family], match->name, match->table, table);
	 return -EINVAL;
	}
	if (match->hooks && (hook_mask & ~match->hooks) != 0) {
	 printk("%s_tables: %s match: bad hook_mask %u\n",
	xt_prefix[family], match->name, hook_mask);
	 return -EINVAL;
	}
	if (match->proto && (match->proto != proto || inv_proto)) {
	 printk("%s_tables: %s match: only valid for protocol %u\n",
	xt_prefix[family], match->name, match->proto);
	 return -EINVAL;
	}
	return 0;
	}
	int xt_check_target(const struct xt_target *target, unsigned short family,
	 unsigned int size, const char *table, unsigned int hook_mask,
	 unsigned short proto, int inv_proto)
	{
	if (XT_ALIGN(target->targetsize) != size) {
	 printk("%s_tables: %s target: invalid size %Zu != %u\n",
	xt_prefix[family], target->name,
	XT_ALIGN(target->targetsize), size);
	 return -EINVAL;
	}
	if (target->table && strcmp(target->table, table)) {
	 printk("%s_tables: %s target: only valid in %s table, not %s\n",
	xt_prefix[family], target->name, target->table, table);
	 return -EINVAL;
	}
	if (target->hooks && (hook_mask & ~target->hooks) != 0) {
	 printk("%s_tables: %s target: bad hook_mask %u\n",
	xt_prefix[family], target->name, hook_mask);
	 return -EINVAL;
	}
	if (target->proto && (target->proto != proto || inv_proto)) {
	 printk("%s_tables: %s target: only valid for protocol %u\n",
	xt_prefix[family], target->name, target->proto);
	 return -EINVAL;
	}
	return 0;
	}
/* 下面是ipsec的policy检验过程处理 */
	static struct xt_match policy_match = {
	.name  = "policy",
	.family  = AF_INET,
	.match  = match,
	.matchsize = sizeof(struct xt_policy_info),
	.checkentry = checkentry,
	.family  = AF_INET,
	.me  = THIS_MODULE,
	};
	static int __init init(void)
	{
	int ret;
	ret = xt_register_match(&policy_match);
	if (ret)
	 return ret;
	ret = xt_register_match(&policy6_match);
	if (ret)
	 xt_unregister_match(&policy_match);
	return ret;
	}
	static int
	match_policy_in(const struct sk_buff *skb, const struct xt_policy_info *info,
	 unsigned short family)
	{
	const struct xt_policy_elem *e;
	struct sec_path *sp = skb->sp;
	int strict = info->flags & XT_POLICY_MATCH_STRICT;
	int i, pos;
	if (sp == NULL)
	 return -1;
	if (strict && info->len != sp->len)
	 return 0;
	for (i = sp->len - 1; i >= 0; i--) {
	 pos = strict ? i - sp->len + 1 : 0;
	 if (pos >= info->len)
	  return 0;
	 e = &info->pol[pos];
	 /* 检查策略 */
	 if (match_xfrm_state(sp->xvec, e, family)) {
	  if (!strict)
	return 1;
	 } else if (strict)
	  return 0;
	}
	return strict ? 1 : 0;
	}

