http://blog.chinaunix.net/uid-26859697-id-5495667.html

前面分析了<span style="-ms-word-wrap: break-word;">slub</span>分配算法的初始化,继续分析<span style="-ms-word-wrap: break-word;">slub</span>分配算法的<span style="-ms-word-wrap: break-word;">slab</span>创建过程。

Slub分配算法创建<span style="-ms-word-wrap: break-word;">slab</span>类型,其函数入口为<span style="-ms-word-wrap: break-word;">kmem_cache_create()</span>,具体实现:


1. 【file:/mm/slab_common.c】
2. struct kmem_cache
3. kmem_cache_create(const char name, size_t size, size_t align,
4.           unsigned long flags, void (ctor)(void ))
5. {
6.     return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL);
7. }
&nbsp;

该函数的入参<span style="-ms-word-wrap: break-word;">name</span>表示要创建的<span style="-ms-word-wrap: break-word;">slab</span>类型名称,<span style="-ms-word-wrap: break-word;">size</span>为该<span style="-ms-word-wrap: break-word;">slab</span>每个对象的大小,<span style="-ms-word-wrap: break-word;">align</span>则是其内存对齐的标准, <span style="-ms-word-wrap: break-word;">flags</span>则表示申请内存的标识,而<span style="-ms-word-wrap: break-word;">ctor</span>则是初始化每个对象的构造函数,至于实现则是简单地封装了<span style="-ms-word-wrap: break-word;">kmem_cache_create_memcg()</span>。

继而分析<span style="-ms-word-wrap: break-word;">kmem_cache_create_memcg()</span>的实现:


1. 【file:/mm/slab_common.c】
2. /
3.   kmem_cache_create - Create a cache.
4.   @name: A string which is used in /proc/slabinfo to identify this cache.
5.   @size: The size of objects to be created in this cache.
6.   @align: The required alignment for the objects.
7.   @flags: SLAB flags
8.   @ctor: A constructor for the objects.
9.  
10.   Returns a ptr to the cache on success, NULL on failure.
11.   Cannot be called within a interrupt, but can be interrupted.
12.   The @ctor is run when new pages are allocated by the cache.
13.  
14.   The flags are
15.  
16.   %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
17.   to catch references to uninitialised memory.
18.  
19.   %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
20.   for buffer overruns.
21.  
22.   %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
23.   cacheline. This can be beneficial if you're counting cycles as closely
24.   as davem.
25.  /
26.  
27. struct kmem_cache
28. kmem_cache_create_memcg(struct mem_cgroup memcg, const char name, size_t size,
29.             size_t align, unsigned long flags, void (ctor)(void ),
30.             struct kmem_cache parent_cache)
31. {
32.     struct kmem_cache s = NULL;
33.     int err;
34.  
35.     get_online_cpus();
36.     mutex_lock(&slab_mutex);
37.  
38.     err = kmem_cache_sanity_check(memcg, name, size);
39.     if (err)
40.         goto out_unlock;
41.  
42.     if (memcg) {
43.         /
44.           Since per-memcg caches are created asynchronously on first
45.           allocation (see memcg_kmem_get_cache()), several threads can
46.           try to create the same cache, but only one of them may
47.           succeed. Therefore if we get here and see the cache has
48.           already been created, we silently return NULL.
49.          /
50.         if (cache_from_memcg_idx(parent_cache, memcg_cache_id(memcg)))
51.             goto out_unlock;
52.     }
53.  
54.     /
55.       Some allocators will constraint the set of valid flags to a subset
56.       of all flags. We expect them to define CACHE_CREATE_MASK in this
57.       case, and we'll just provide them with a sanitized version of the
58.       passed flags.
59.      /
60.     flags &= CACHE_CREATE_MASK;
61.  
62.     s = kmem_cache_alias(memcg, name, size, align, flags, ctor);
63.     if (s)
64.         goto out_unlock;
65.  
66.     err = -ENOMEM;
67.     s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
68.     if (!s)
69.         goto out_unlock;
70.  
71.     s->object_size = s->size = size;
72.     s->align = calculate_alignment(flags, align, size);
73.     s->ctor = ctor;
74.  
75.     s->name = kstrdup(name, GFP_KERNEL);
76.     if (!s->name)
77.         goto out_free_cache;
78.  
79.     err = memcg_alloc_cache_params(memcg, s, parent_cache);
80.     if (err)
81.         goto out_free_cache;
82.  
83.     err =
kmem_cache_create(s, flags);
84.     if (err)
85.         goto out_free_cache;
86.  
87.     s->refcount = 1;
88.     list_add(&s->list, &slab_caches);
89.     memcg_register_cache(s);
90.  
91. out_unlock:
92.     mutex_unlock(&slab_mutex);
93.     put_online_cpus();
94.  
95.     if (err) {
96.         /
97.           There is no point in flooding logs with warnings or
98.           especially crashing the system if we fail to create a cache
99.           for a memcg. In this case we will be accounting the memcg
100.           allocation to the root cgroup until we succeed to create its
101.           own cache, but it isn't that critical.
102.          */
103.         if (!memcg)
104.             return NULL;
105.  
106.         if (flags & SLAB_PANIC)
107.             panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
108.                 name, err);
109.         else {
110.             printk(KERN_WARNING "kmem_cache_create(%s) failed with error %d",
111.                 name, err);
112.             dump_stack();
113.         }
114.         return NULL;
115.     }
116.     return s;
117.  
118. out_free_cache:
119.     memcg_free_cache_params(s);
120.     kfree(s->name);
121.     kmem_cache_free(kmem_cache, s);
122.     goto out_unlock;
123. }
&nbsp;

函数入口处调用的<span style="-ms-word-wrap: break-word;">get_online_cpus()</span>与<span style="-ms-word-wrap: break-word;">put_online_cpus()</span>是配对使用的,用于对<span style="-ms-word-wrap: break-word;">cpu_online_map</span>的加解锁;接下来的<span style="-ms-word-wrap: break-word;">kmem_cache_sanity_check()</span>主要是用于合法性检查,检查指定名称的<span style="-ms-word-wrap: break-word;">slab</span>是否已经创建,仅在<span style="-ms-word-wrap: break-word;">CONFIG_DEBUG_VM</span>开启的时候起作用;如果<span style="-ms-word-wrap: break-word;">memcg</span>不为空指针,表示创建的<span style="-ms-word-wrap: break-word;">slab</span>与<span style="-ms-word-wrap: break-word;">memcg</span>关联,此外由于每<span style="-ms-word-wrap: break-word;">memcg</span>的<span style="-ms-word-wrap: break-word;">cache</span>会在初始化分配的时候异步创建,多个线程将会尝试创建同样的<span style="-ms-word-wrap: break-word;">cache</span>,但只有一个会创建成功,那么如果代码执行到此处调用<span style="-ms-word-wrap: break-word;">cache_from_memcg_idx()</span>检查到<span style="-ms-word-wrap: break-word;">cache</span>已经被创建,那么<span style="-ms-word-wrap: break-word;">cache_from_memcg_idx()</span>将会返回<span style="-ms-word-wrap: break-word;">NULL</span>;再往下的<span style="-ms-word-wrap: break-word;">__kmem_cache_alias()</span>,该函数检查已创建的<span style="-ms-word-wrap: break-word;">slab</span>是否存在与当前想要创建的<span style="-ms-word-wrap: break-word;">slab</span>的对象大小相匹配的,如果有则通过别名合并到一个缓存中进行访问。

看一下<span style="-ms-word-wrap: break-word;">__kmem_cache_alias()</span>具体实现:


1. 【file:/mm/slub.c】
2. struct kmem_cache
3. __kmem_cache_alias(struct mem_cgroup memcg, const char name, size_t size,
4.            size_t align, unsigned long flags, void (ctor)(void ))
5. {
6.     struct kmem_cache s;
7.  
8.     s = find_mergeable(memcg, size, align, flags, name, ctor);
9.     if (s) {
10.         s->refcount++;
11.         /
12.           Adjust the object sizes so that we clear
13.           the complete object on kzalloc.
14.          /
15.         s->object_size = max(s->object_size, (int)size);
16.         s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
17.  
18.         if (sysfs_slab_alias(s, name)) {
19.             s->refcount--;
20.             s = NULL;
21.         }
22.     }
23.  
24.     return s;
25. }
&nbsp;

该函数主要通过<span style="-ms-word-wrap: break-word;">find_mergeable()</span>查找可合并<span style="-ms-word-wrap: break-word;">slab</span>的<span style="-ms-word-wrap: break-word;">kmem_cache</span>结构,如果找到的情况下,将<span style="-ms-word-wrap: break-word;">kmem_cache</span>的引用计数作自增,同时更新<span style="-ms-word-wrap: break-word;">kmem_cache</span>的对象大小及元数据偏移量,最后调用<span style="-ms-word-wrap: break-word;">sysfs_slab_alias()</span>在<span style="-ms-word-wrap: break-word;">sysfs</span>中添加别号。

进一步分析<span style="-ms-word-wrap: break-word;">find_mergeable()</span>函数的具体实现:


1. 【file:/mm/slub.c】
2. static struct kmem_cache find_mergeable(struct mem_cgroup memcg, size_t size,
3.         size_t align, unsigned long flags, const char name,
4.         void (ctor)(void ))
5. {
6.     struct kmem_cache s;
7.  
8.     if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
9.         return NULL;
10.  
11.     if (ctor)
12.         return NULL;
13.  
14.     size = ALIGN(size, sizeof(void ));
15.     align = calculate_alignment(flags, align, size);
16.     size = ALIGN(size, align);
17.     flags = kmem_cache_flags(size, flags, name, NULL);
18.  
19.     list_for_each_entry(s, &slab_caches, list) {
20.         if (slab_unmergeable(s))
21.             continue;
22.  
23.         if (size > s->size)
24.             continue;
25.  
26.         if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
27.                 continue;
28.         /
29.           Check if alignment is compatible.
30.           Courtesy of Adrian Drzewiecki
31.          /
32.         if ((s->size & ~(align - 1)) != s->size)
33.             continue;
34.  
35.         if (s->size - size >= sizeof(void ))
36.             continue;
37.  
38.         if (!cache_match_memcg(s, memcg))
39.             continue;
40.  
41.         return s;
42.     }
43.     return NULL;
44. }
&nbsp;

该查找函数先获取将要创建的<span style="-ms-word-wrap: break-word;">slab</span>的内存对齐值及创建<span style="-ms-word-wrap: break-word;">slab</span>的内存标识。接着经由<span style="-ms-word-wrap: break-word;">list_for_each_entry()</span>遍历整个<span style="-ms-word-wrap: break-word;">slab_caches</span>链表;通过<span style="-ms-word-wrap: break-word;">slab_unmergeable()</span>判断遍历的<span style="-ms-word-wrap: break-word;">kmem_cache</span>是否允许合并,主要依据主要是缓冲区属性的标识及<span style="-ms-word-wrap: break-word;">slab</span>的对象是否有特定的初始化构造函数,如果不允许合并则跳过;判断当前的<span style="-ms-word-wrap: break-word;">kmem_cache</span>的对象大小是否小于要查找的,是则跳过;再接着<span style="-ms-word-wrap: break-word;">if ((flags &amp; SLUB_MERGE_SAME) != (s-&gt;flags &amp; SLUB_MERGE_SAME))&nbsp; </span>判断当前的<span style="-ms-word-wrap: break-word;">kmem_cache</span>与查找的标识类型是否一致,不是则跳过;往下就是<span style="-ms-word-wrap: break-word;">if ((s-&gt;size &amp; ~(align - 1)) != s-&gt;size)</span>判断对齐量是否匹配,<span style="-ms-word-wrap: break-word;">if (s-&gt;size - size &gt;= sizeof(void *))</span>判断大小相差是否超过指针类型大小,<span style="-ms-word-wrap: break-word;">if (!cache_match_memcg(s, memcg))</span>判断<span style="-ms-word-wrap: break-word;">memcg</span>是否匹配。经由多层判断检验,如果找到可合并的<span style="-ms-word-wrap: break-word;">slab</span>,则返回回去,否则返回<span style="-ms-word-wrap: break-word;">NULL</span>。

回到<span style="-ms-word-wrap: break-word;">kmem_cache_create_memcg()</span>,如果<span style="-ms-word-wrap: break-word;">__kmem_cache_alias()</span>找到了可合并的<span style="-ms-word-wrap: break-word;">slab</span>,则将其<span style="-ms-word-wrap: break-word;">kmem_cache</span>结构返回。否则将会创建新的<span style="-ms-word-wrap: break-word;">slab</span>,其将通过<span style="-ms-word-wrap: break-word;">kmem_cache_zalloc()</span>申请一个<span style="-ms-word-wrap: break-word;">kmem_cache</span>结构对象,然后初始化该结构的对象大小、对齐值及对象的初始化构造函数等数据成员信息;其中<span style="-ms-word-wrap: break-word;">slab</span>的名称将通过<span style="-ms-word-wrap: break-word;">kstrdup()</span>申请空间并拷贝存储至空间中;接着的<span style="-ms-word-wrap: break-word;">memcg_alloc_cache_params()</span>主要是申请<span style="-ms-word-wrap: break-word;">kmem_cache</span>的<span style="-ms-word-wrap: break-word;">memcg_params</span>成员结构空间并初始化;至于往下的<span style="-ms-word-wrap: break-word;">__kmem_cache_create()</span>则主要是申请并创建<span style="-ms-word-wrap: break-word;">slub</span>的管理结构及<span style="-ms-word-wrap: break-word;">kmem_cache</span>其他数据的初始化,具体后面将进行详细分析。

接着往下的<span style="-ms-word-wrap: break-word;">out_unlock</span>标签主要是用于处理<span style="-ms-word-wrap: break-word;">slab</span>创建的收尾工作,如果创建失败,将会进入<span style="-ms-word-wrap: break-word;">err</span>分支进行失败处理;最后的<span style="-ms-word-wrap: break-word;">out_free_cache</span>标签主要是用于初始化<span style="-ms-word-wrap: break-word;">kmem_cache</span>失败时将申请的空间进行释放,然后跳转至<span style="-ms-word-wrap: break-word;">out_unlock</span>进行失败后处理。

具体看一下<span style="-ms-word-wrap: break-word;">__kmem_cache_create()</span>实现:


1. 【file:/mm/slub.c】
2. int __kmem_cache_create(struct kmem_cache s, unsigned long flags)
3. {
4.     int err;
5.  
6.     err = kmem_cache_open(s, flags);
7.     if (err)
8.         return err;
9.  
10.     / Mutex is not taken during early boot */
11.     if (slab_state <= UP)
12.         return 0;
13.  
14.     memcg_propagate_slab_attrs(s);
15.     mutex_unlock(&slab_mutex);
16.     err = sysfs_slab_add(s);
17.     mutex_lock(&slab_mutex);
18.  
19.     if (err)
20.         kmem_cache_close(s);
21.  
22.     return err;
23. }
&nbsp;

&nbsp;&nbsp;&nbsp; 其中里面调用的<span style="-ms-word-wrap: break-word;">kmem_cache_open()</span>主要是初始化<span style="-ms-word-wrap: break-word;">slub</span>结构。而后在调用<span style="-ms-word-wrap: break-word;">sysfs_slab_add()</span>前会先解锁<span style="-ms-word-wrap: break-word;">slab_mutex</span>,这主要是因为<span style="-ms-word-wrap: break-word;">sysfs</span>函数会做大量的事情,为了避免调用<span style="-ms-word-wrap: break-word;">sysfs</span>函数中持有该锁从而导致阻塞等情况;而<span style="-ms-word-wrap: break-word;">sysfs_slab_add()</span>主要是将<span style="-ms-word-wrap: break-word;">kmem_cache</span>添加到<span style="-ms-word-wrap: break-word;">sysfs</span>。如果出错,将会通过<span style="-ms-word-wrap: break-word;">kmem_cache_close()</span>将<span style="-ms-word-wrap: break-word;">slub</span>销毁。

&nbsp;&nbsp;&nbsp; 深入分析<span style="-ms-word-wrap: break-word;">kmem_cache_open()</span>实现:


1. 【file:/mm/slub.c】
2. static int kmem_cache_open(struct kmem_cache s, unsigned long flags)
3. {
4.     s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
5.     s->reserved = 0;
6.  
7.     if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
8.         s->reserved = sizeof(struct rcu_head);
9.  
10.     if (!calculate_sizes(s, -1))
11.         goto error;
12.     if (disable_higher_order_debug) {
13.         /
14.           Disable debugging flags that store metadata if the min slab
15.           order increased.
16.          /
17.         if (get_order(s->size) > get_order(s->object_size)) {
18.             s->flags &= ~DEBUG_METADATA_FLAGS;
19.             s->offset = 0;
20.             if (!calculate_sizes(s, -1))
21.                 goto error;
22.         }
23.     }
24.  
25. #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
26.     defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
27.     if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
28.         / Enable fast mode /
29.         s->flags |= __CMPXCHG_DOUBLE;
30. #endif
31.  
32.     /
33.       The larger the object size is, the more pages we want on the partial
34.       list to avoid pounding the page allocator excessively.
35.      /
36.     set_min_partial(s, ilog2(s->size) / 2);
37.  
38.     /
39.       cpu_partial determined the maximum number of objects kept in the
40.       per cpu partial lists of a processor.
41.      
42.       Per cpu partial lists mainly contain slabs that just have one
43.       object freed. If they are used for allocation then they can be
44.       filled up again with minimal effort. The slab will never hit the
45.       per node partial lists and therefore no locking will be required.
46.      
47.       This setting also determines
48.      
49.       A) The number of objects from per cpu partial slabs dumped to the
50.       per node list when we reach the limit.
51.       B) The number of objects in cpu partial slabs to extract from the
52.       per node list when we run out of per cpu objects. We only fetch
53.       50% to keep some capacity around for frees.
54.      /
55.     if (!kmem_cache_has_cpu_partial(s))
56.         s->cpu_partial = 0;
57.     else if (s->size >= PAGE_SIZE)
58.         s->cpu_partial = 2;
59.     else if (s->size >= 1024)
60.         s->cpu_partial = 6;
61.     else if (s->size >= 256)
62.         s->cpu_partial = 13;
63.     else
64.         s->cpu_partial = 30;
65.  
66. #ifdef CONFIG_NUMA
67.     s->remote_node_defrag_ratio = 1000;
68. #endif
69.     if (!init_kmem_cache_nodes(s))
70.         goto error;
71.  
72.     if (alloc_kmem_cache_cpus(s))
73.         return 0;
74.  
75.     free_kmem_cache_nodes(s);
76. error:
77.     if (flags & SLAB_PANIC)
78.         panic("Cannot create slab %s size=%lu realsize=%u "
79.             "order=%u offset=%u flags=%lx\n",
80.             s->name, (unsigned long)s->size, s->size,
81.             oo_order(s->oo), s->offset, flags);
82.     return -EINVAL;
83. }
&nbsp;

&nbsp;&nbsp; 这里面的<span style="-ms-word-wrap: break-word;">kmem_cache_flags()</span>用于获取设置缓存描述的标识,用于区分<span style="-ms-word-wrap: break-word;">slub</span>是否开启了调试;继而调用<span style="-ms-word-wrap: break-word;">calculate_sizes()</span>计算并初始化<span style="-ms-word-wrap: break-word;">kmem_cache</span>结构的各项数据。

&nbsp;&nbsp; 具体<span style="-ms-word-wrap: break-word;">calculate_sizes()</span>实现:


1. 【file:/mm/slub.c】
2. /
3.   calculate_sizes() determines the order and the distribution of data within
4.   a slab object.
5.  /
6. static int calculate_sizes(struct kmem_cache s, int forced_order)
7. {
8.     unsigned long flags = s->flags;
9.     unsigned long size = s->object_size;
10.     int order;
11.  
12.     /
13.       Round up object size to the next word boundary. We can only
14.       place the free pointer at word boundaries and this determines
15.       the possible location of the free pointer.
16.      /
17.     size = ALIGN(size, sizeof(void ));
18.  
19. #ifdef CONFIG_SLUB_DEBUG
20.     /
21.       Determine if we can poison the object itself. If the user of
22.       the slab may touch the object after free or before allocation
23.       then we should never poison the object itself.
24.      /
25.     if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
26.             !s->ctor)
27.         s->flags |= OBJECT_POISON;
28.     else
29.         s->flags &= ~
OBJECT_POISON;
30.  
31.  
32.     /
33.       If we are Redzoning then check if there is some space between the
34.       end of the object and the free pointer. If not then add an
35.       additional word to have some bytes to store Redzone information.
36.      /
37.     if ((flags & SLAB_RED_ZONE) && size == s->object_size)
38.         size += sizeof(void );
39. #endif
40.  
41.     /
42.       With that we have determined the number of bytes in actual use
43.       by the object. This is the potential offset to the free pointer.
44.      /
45.     s->inuse = size;
46.  
47.     if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
48.         s->ctor)) {
49.         /
50.           Relocate free pointer after the object if it is not
51.           permitted to overwrite the first word of the object on
52.           kmem_cache_free.
53.          
54.           This is the case if we do RCU, have a constructor or
55.           destructor or are poisoning the objects.
56.          /
57.         s->offset = size;
58.         size += sizeof(void );
59.     }
60.  
61. #ifdef CONFIG_SLUB_DEBUG
62.     if (flags & SLAB_STORE_USER)
63.         /
64.           Need to store information about allocs and frees after
65.           the object.
66.          /
67.         size += 2 sizeof(struct track);
68.  
69.     if (flags & SLAB_RED_ZONE)
70.         /
71.           Add some empty padding so that we can catch
72.           overwrites from earlier objects rather than let
73.           tracking information or the free pointer be
74.           corrupted if a user writes before the start
75.           of the object.
76.          /
77.         size += sizeof(void );
78. #endif
79.  
80.     /
81.       SLUB stores one object immediately after another beginning from
82.       offset 0. In order to align the objects we have to simply size
83.       each object to conform to the alignment.
84.      /
85.     size = ALIGN(size, s->align);
86.     s->size = size;
87.     if (forced_order >= 0)
88.         order = forced_order;
89.     else
90.         order = calculate_order(size, s->reserved);
91.  
92.     if (order < 0)
93.         return 0;
94.  
95.     s->allocflags = 0;
96.     if (order)
97.         s->allocflags |= GFP_COMP;
98.  
99.     if (s->flags & SLAB_CACHE_DMA)
100.         s->allocflags |= GFP_DMA;
101.  
102.     if (s->flags & SLAB_RECLAIM_ACCOUNT)
103.         s->allocflags |=
GFP_RECLAIMABLE;
104.  
105.     /
106.       Determine the number of objects per slab
107.      /
108.     s->oo = oo_make(order, size, s->reserved);
109.     s->min = oo_make(get_order(size), size, s->reserved);
110.     if (oo_objects(s->oo) > oo_objects(s->max))
111.         s->max = s->oo;
112.  
113.     return !!oo_objects(s->oo);
114. }
&nbsp;

最前面的<span style="-ms-word-wrap: break-word;">ALIGN(size, sizeof(void *))</span>是用于将<span style="-ms-word-wrap: break-word;">slab</span>对象的大小舍入对与<span style="-ms-word-wrap: break-word;">sizeof(void *)</span>指针大小对齐,其为了能够将空闲指针存放至对象的边界中;如果开启<span style="-ms-word-wrap: break-word;">CONFIG_SLUB_DEBUG</span>配置的情况下,接下来的<span style="-ms-word-wrap: break-word;">if ((flags &amp; SLAB_POISON) &amp;&amp; !(flags &amp; SLAB_DESTROY_BY_RCU) &amp;&amp; !s-&gt;ctor)</span>判断则为了判断用户是否会在对象释放后或者申请前访问,以设定<span style="-ms-word-wrap: break-word;">SLUB</span>的调试功能是否使能,也就是决定了对<span style="-ms-word-wrap: break-word;">poison</span>对象是否进行修改操作,其主要是为了通过将对象填充入特定的字符数据以实现对内存写越界进行调测,其填入的字符有:

#define&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; POISON_INUSE&nbsp;&nbsp; 0x5a&nbsp;&nbsp;&nbsp; /* for use-uninitialised poisoning */

#define&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; POISON_FREE&nbsp;&nbsp;&nbsp; 0x6b&nbsp;&nbsp;&nbsp; /* for use-after-free poisoning */

#define&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; POISON_END&nbsp;&nbsp;&nbsp;&nbsp; 0xa5&nbsp;&nbsp;&nbsp; /* end-byte of poisoning */

再接着的<span style="-ms-word-wrap: break-word;">if ((flags &amp; SLAB_RED_ZONE) &amp;&amp; size == s-&gt;object_size)</span>检验同样用于调测,其主要是在对象前后设置<span style="-ms-word-wrap: break-word;">RedZone</span>信息,通过检查该信息以扑捉<span style="-ms-word-wrap: break-word;">Buffer</span>溢出的问题;然后设置<span style="-ms-word-wrap: break-word;">kmem_cache</span>的<span style="-ms-word-wrap: break-word;">inuse</span>成员以表示元数据的偏移量,同时表示对象实际使用的大小,也意味着对象与空闲对象指针之间的可能偏移量;接着往下的<span style="-ms-word-wrap: break-word;">if (((flags &amp; (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || s-&gt;ctor))</span>判断是否允许对象写越界,如果不允许则重定位空闲对象指针到对象的末尾,并设置<span style="-ms-word-wrap: break-word;">kmem_cache</span>结构的<span style="-ms-word-wrap: break-word;">offset</span>(即对象指针的偏移),同时调整<span style="-ms-word-wrap: break-word;">size</span>为包含空闲对象指针。

同样在开启了<span style="-ms-word-wrap: break-word;">CONFIG_SLUB_DEBUG</span>配置的情况下,如果设置了<span style="-ms-word-wrap: break-word;">SLAB_STORE_USER</span>标识,将会在对象末尾加上两个<span style="-ms-word-wrap: break-word;">track</span>的空间大小,用于记录该对象的使用轨迹信息(分别是申请和释放的信息)。具体会记录什么,可以看一下<span style="-ms-word-wrap: break-word;">track</span>的结构定义;此外如果设置了<span style="-ms-word-wrap: break-word;">SLAB_RED_ZONE</span>,将会新增空白边界,主要是用于破获内存写越界信息,目的是与其任由其越界破坏了空闲对象指针或者内存申请释放轨迹信息,倒不如捕获内存写越界信息。

再往下则是根据前面统计的<span style="-ms-word-wrap: break-word;">size</span>做对齐操作并更新到<span style="-ms-word-wrap: break-word;">kmem_cache</span>结构中;然后根据调用时的入参<span style="-ms-word-wrap: break-word;">forced_order</span>为<span style="-ms-word-wrap: break-word;">-1</span>,其将通过<span style="-ms-word-wrap: break-word;">calculate_order()</span>计算单<span style="-ms-word-wrap: break-word;">slab</span>的页框阶数,同时得出<span style="-ms-word-wrap: break-word;">kmem_cache</span>结构的<span style="-ms-word-wrap: break-word;">oo</span>、<span style="-ms-word-wrap: break-word;">min</span>、<span style="-ms-word-wrap: break-word;">max</span>等相关信息。

着重分析一下<span style="-ms-word-wrap: break-word;">calculate_order()</span>:


1. 【file:/mm/slub.c】
2. static inline int calculate_order(int size, int reserved)
3. {
4.     int order;
5.     int min_objects;
6.     int fraction;
7.     int max_objects;
8.  
9.     /
10.       Attempt to find best configuration for a slab. This
11.       works by first attempting to generate a layout with
12.       the best configuration and backing off gradually.
13.      
14.       First we reduce the acceptable waste in a slab. Then
15.       we reduce the minimum objects required in a slab.
16.      /
17.     min_objects = slub_min_objects;
18.     if (!min_objects)
19.         min_objects = 4 (fls(nr_cpu_ids) + 1);
20.     max_objects = order_objects(slub_max_order, size, reserved);
21.     min_objects = min(min_objects, max_objects);
22.  
23.     while (min_objects > 1) {
24.         fraction = 16;
25.         while (fraction >= 4) {
26.             order = slab_order(size, min_objects,
27.                     slub_max_order, fraction, reserved);
28.             if (order <= slub_max_order)
29.                 return order;
30.             fraction /= 2;
31.         }
32.         min_objects--;
33.     }
34.  
35.     /
36.       We were unable to place multiple objects in a slab. Now
37.       lets see if we can place a single object there.
38.      /
39.     order = slab_order(size, 1, slub_max_order, 1, reserved);
40.     if (order <= slub_max_order)
41.         return order;
42.  
43.     /
44.       Doh this slab cannot be placed using slub_max_order.
45.      /
46.     order = slab_order(size, 1, MAX_ORDER, 1, reserved);
47.     if (order < MAX_ORDER)
48.         return order;
49.     return -ENOSYS;
50. }
&nbsp;

其主要是计算每个<span style="-ms-word-wrap: break-word;">slab</span>所需页面的阶数。经判断来自系统参数的最少对象数<span style="-ms-word-wrap: break-word;">slub_min_objects</span>是否已经配置,否则将会通过处理器数<span style="-ms-word-wrap: break-word;">nr_cpu_ids</span>计算最小对象数;同时通过<span style="-ms-word-wrap: break-word;">order_objects()</span>计算最高阶下,<span style="-ms-word-wrap: break-word;">slab</span>对象最多个数,最后取得最小值<span style="-ms-word-wrap: break-word;">min_objects</span>;接着通过两个<span style="-ms-word-wrap: break-word;">while</span>循环,分别对<span style="-ms-word-wrap: break-word;">min_objects</span>及<span style="-ms-word-wrap: break-word;">fraction</span>进行调整,通过<span style="-ms-word-wrap: break-word;">slab_order()</span>计算找出最佳的阶数,其中<span style="-ms-word-wrap: break-word;">fraction</span>用来表示<span style="-ms-word-wrap: break-word;">slab</span>内存未使用率的指标,值越大表示允许的未使用内存越少,也就是说不断调整单个<span style="-ms-word-wrap: break-word;">slab</span>的对象数以及降低碎片指标,由此找到一个最佳值。

如果对象个数及内存未使用率指标都调整到最低了仍得不到最佳阶值时,将尝试一个<span style="-ms-word-wrap: break-word;">slab</span>仅放入单个对象,由此计算出的<span style="-ms-word-wrap: break-word;">order</span>不大于<span style="-ms-word-wrap: break-word;">slub_max_order</span>,则将该值返回;如果<span style="-ms-word-wrap: break-word;">order</span>大于<span style="-ms-word-wrap: break-word;">slub_max_order</span>,则不得不尝试将阶数值调整至最大值<span style="-ms-word-wrap: break-word;">MAX_ORDER</span>,以期得到结果;如果仍未得结果,那么将返回失败。

末尾看一下<span style="-ms-word-wrap: break-word;">slab_order()</span>的实现:


1. 【file:/mm/slub.c】
2. /
3.   Calculate the order of allocation given an slab object size.
4.  
5.   The order of allocation has significant impact on performance and other
6.   system components. Generally order 0 allocations should be preferred since
7.   order 0 does not cause fragmentation in the page allocator. Larger objects
8.   be problematic to put into order 0 slabs because there may be too much
9.   unused space left. We go to a higher order if more than 1/16th of the slab
10.   would be wasted.
11.  
12.   In order to reach satisfactory performance we must ensure that a minimum
13.   number of objects is in one slab. Otherwise we may generate too much
14.   activity on the partial lists which requires taking the list_lock. This is
15.   less a concern for large slabs though which are rarely used.
16.  
17.   slub_max_order specifies the order where we begin to stop considering the
18.   number of objects in a slab as critical. If we reach slub_max_order then
19.   we try to keep the page order as low as possible. So we accept more waste
20.   of space in favor of a small page order.
21.  
22.   Higher order allocations also allow the placement of more objects in a
23.   slab and thereby reduce object handling overhead. If the user has
24.   requested a higher mininum order then we start with that one instead of
25.   the smallest order which will fit the object.
26.  /
27. static inline int slab_order(int size, int min_objects,
28.                 int max_order, int fract_leftover, int reserved)
29. {
30.     int order;
31.     int rem;
32.     int min_order = slub_min_order;
33.  
34.     if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
35.         return get_order(size MAX_OBJS_PER_PAGE) - 1;
36.  
37.     for (order = max(min_order,
38.                 fls(min_objects size - 1) - PAGE_SHIFT);
39.             order <= max_order; order++) {
40.  
41.         unsigned long slab_size = PAGE_SIZE << order;
42.  
43.         if (slab_size < min_objects size + reserved)
44.             continue;
45.  
46.         rem = (slab_size - reserved) % size;
47.  
48.         if (rem <= slab_size / fract_leftover)
49.             break;
50.  
51.     }
52.  
53.     return order;
54. }
&nbsp;

该函数入参<span style="-ms-word-wrap: break-word;">size</span>表示对象大小,<span style="-ms-word-wrap: break-word;">min_objects</span>为最小对象量,<span style="-ms-word-wrap: break-word;">max_order</span>为最高阶,<span style="-ms-word-wrap: break-word;">fract_leftover</span>表示<span style="-ms-word-wrap: break-word;">slab</span>的内存未使用率,而<span style="-ms-word-wrap: break-word;">reserved</span>则表示<span style="-ms-word-wrap: break-word;">slab</span>的保留空间大小。内存页面存储对象个数使用的<span style="-ms-word-wrap: break-word;">objects</span>是<span style="-ms-word-wrap: break-word;">u15</span>的长度,故其最多可存储个数为<span style="-ms-word-wrap: break-word;">MAX_OBJS_PER_PAGE</span>,即<span style="-ms-word-wrap: break-word;">32767</span>。所以如果<span style="-ms-word-wrap: break-word;">order_objects()</span>以<span style="-ms-word-wrap: break-word;">min_order</span>换算内存大小剔除<span style="-ms-word-wrap: break-word;">reserved</span>后,通过<span style="-ms-word-wrap: break-word;">size</span>求得的对象个数大于<span style="-ms-word-wrap: break-word;">MAX_OBJS_PER_PAGE</span>,则改为<span style="-ms-word-wrap: break-word;">MAX_OBJS_PER_PAGE</span>进行求阶。如果对象大小较大时,页面容纳的数量小于<span style="-ms-word-wrap: break-word;">MAX_OBJS_PER_PAGE</span>,那么通过<span style="-ms-word-wrap: break-word;">for</span>循环,调整阶数以期找到一个能够容纳该大小最少对象数量及其保留空间的并且内存的使用率满足条件的阶数。

末了回到<span style="-ms-word-wrap: break-word;">kmem_cache_open()</span>函数中继续查看其剩余的初始化动作。

其会继续初始化<span style="-ms-word-wrap: break-word;">slub</span>结构,<span style="-ms-word-wrap: break-word;">set_min_partial()</span>是用于设置<span style="-ms-word-wrap: break-word;">partial</span>链表的最小值,主要是由于对象的大小越大,则需挂入的<span style="-ms-word-wrap: break-word;">partial</span>链表的页面则容易越多,设置最小值是为了避免过度使用页面分配器造成冲击。再往下的多个<span style="-ms-word-wrap: break-word;">if-else if</span>判断赋值主要是根据对象的大小以及配置的情况,对<span style="-ms-word-wrap: break-word;">cpu_partial</span>进行设置;<span style="-ms-word-wrap: break-word;">cpu_partial</span>表示的是每个<span style="-ms-word-wrap: break-word;">CPU</span>在<span style="-ms-word-wrap: break-word;">partial</span>链表中的最多对象个数,该数据决定了:<span style="-ms-word-wrap: break-word;">1</span>)当使用到了极限时,每个<span style="-ms-word-wrap: break-word;">CPU</span>的<span style="-ms-word-wrap: break-word;">partial slab</span>释放到每个管理节点链表的个数;<span style="-ms-word-wrap: break-word;">2</span>)当使用完每个<span style="-ms-word-wrap: break-word;">CPU</span>的对象数时,<span style="-ms-word-wrap: break-word;">CPU</span>的<span style="-ms-word-wrap: break-word;">partial slab</span>来自每个管理节点的对象数。

kmem_cache_open()函数中接着往下的是<span style="-ms-word-wrap: break-word;">init_kmem_cache_nodes()</span>:


1. 【file:/mm/slub.c】
2. static int init_kmem_cache_nodes(struct kmem_cache s)
3. {
4.     int node;
5.  
6.     for_each_node_state(node, N_NORMAL_MEMORY) {
7.         struct kmem_cache_node n;
8.  
9.         if (slab_state == DOWN) {
10.             early_kmem_cache_node_alloc(node);
11.             continue;
12.         }
13.         n = kmem_cache_alloc_node(kmem_cache_node,
14.                         GFP_KERNEL, node);
15.  
16.         if (!n) {
17.             free_kmem_cache_nodes(s);
18.             return 0;
19.         }
20.  
21.         s->node[node] = n;
22.         init_kmem_cache_node(n);
23.     }
24.     return 1;
25. }
&nbsp;

该函数通过<span style="-ms-word-wrap: break-word;">for_each_node_state</span>遍历每个管理节点,并向<span style="-ms-word-wrap: break-word;">kmem_cache_node</span>全局管理控制块为所遍历的节点申请一个<span style="-ms-word-wrap: break-word;">kmem_cache_node</span>结构空间对象,并将<span style="-ms-word-wrap: break-word;">kmem_cache</span>的<span style="-ms-word-wrap: break-word;">s</span>内的成员<span style="-ms-word-wrap: break-word;">node</span>初始化。

值得注意的是<span style="-ms-word-wrap: break-word;">slab_state</span>如果是<span style="-ms-word-wrap: break-word;">DOWN</span>状态,表示<span style="-ms-word-wrap: break-word;">slub</span>分配器还没有初始化完毕,意味着<span style="-ms-word-wrap: break-word;">kmem_cache_node</span>结构空间对象的<span style="-ms-word-wrap: break-word;">cache</span>还没建立,暂时无法进行对象分配,此时将会通过<span style="-ms-word-wrap: break-word;">early_kmem_cache_node_alloc()</span>进行<span style="-ms-word-wrap: break-word;">kmem_cache_node</span>对象的<span style="-ms-word-wrap: break-word;">slab</span>进行创建。这里补充说明一下:这是是<span style="-ms-word-wrap: break-word;">slub</span>分配算法初始化才会进入到的分支,即<span style="-ms-word-wrap: break-word;">mm_init()-&gt;kmem_cache_init()-&gt;create_boot_cache()-&gt;create_boot_cache(kmem_cache_node, &quot;kmem_cache_node&quot;,sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN)-&gt;</span><span style="-ms-word-wrap: break-word;"> </span>__kmem_cache_create()-&gt;kmem_cache_open()-&gt;init_kmem_cache_nodes()-&gt;early_kmem_cache_node_alloc()该流程才会进入到<span style="-ms-word-wrap: break-word;">early_kmem_cache_node_alloc()</span>该函数执行,然后执行完了在<span style="-ms-word-wrap: break-word;">kmem_cache_init()</span>调用完<span style="-ms-word-wrap: break-word;">create_boot_cache()</span>及<span style="-ms-word-wrap: break-word;">register_hotmemory_notifier()</span>随即将<span style="-ms-word-wrap: break-word;">slab_state</span>设置为<span style="-ms-word-wrap: break-word;">PARTIAL</span>表示已经可以分配<span style="-ms-word-wrap: break-word;">kmem_cache_node</span>。

此外,如果已经创建了<span style="-ms-word-wrap: break-word;">kmem_cache_node</span>的<span style="-ms-word-wrap: break-word;">slab</span>,则将会通过<span style="-ms-word-wrap: break-word;">kmem_cache_alloc_node()</span>从初始化好的<span style="-ms-word-wrap: break-word;">kmem_cache_node</span>申请一空闲对象。

现在对前期的缺失进行补全分析,进入<span style="-ms-word-wrap: break-word;">early_kmem_cache_node_alloc()</span>,分析一下其实现:


1. 【file:/mm/slub.c】
2. /
3.   No kmalloc_node yet so do it by hand. We know that this is the first
4.   slab on the node for this slabcache. There are no concurrent accesses
5.   possible.
6.  
7.   Note that this function only works on the kmem_cache_node
8.   when allocating for the kmem_cache_node. This is used for bootstrapping
9.   memory on a fresh node that has no slab structures yet.
10.  /
11. static void early_kmem_cache_node_alloc(int node)
12. {
13.     struct page page;
14.     struct kmem_cache_node n;
15.  
16.     BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
17.  
18.     page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
19.  
20.     BUG_ON(!page);
21.     if (page_to_nid(page) != node) {
22.         printk(KERN_ERR "SLUB: Unable to allocate memory from "
23.                 "node %d\n", node);
24.         printk(KERN_ERR "SLUB: Allocating a useless per node structure "
25.                 "in order to be able to continue\n");
26.     }
27.  
28.     n = page->freelist;
29.     BUG_ON(!n);
30.     page->freelist = get_freepointer(kmem_cache_node, n);
31.     page->inuse = 1;
32.     page->frozen = 0;
33.     kmem_cache_node->node[node] = n;
34. #ifdef CONFIG_SLUB_DEBUG
35.     init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
36.     init_tracking(kmem_cache_node, n);
37. #endif
38.     init_kmem_cache_node(n);
39.     inc_slabs_node(kmem_cache_node, node, page->objects);
40.  
41.     /
42.       No locks need to be taken here as it has just been
43.       initialized and there is no concurrent access.
44.      */
45.     __add_partial(n, page, DEACTIVATE_TO_HEAD);
46. }
&nbsp;

该函数将先通过<span style="-ms-word-wrap: break-word;">new_slab()</span>创建<span style="-ms-word-wrap: break-word;">kmem_cache_node</span>结构空间对象的<span style="-ms-word-wrap: break-word;">slab</span>,如果创建的<span style="-ms-word-wrap: break-word;">slab</span>不在对应的内存节点中,则通过<span style="-ms-word-wrap: break-word;">printk</span>输出调试信息;接着向创建的<span style="-ms-word-wrap: break-word;">slab</span>取出一个对象,并根据<span style="-ms-word-wrap: break-word;">CONFIG_SLUB_DEBUG</span>配置对对象进行初始化(<span style="-ms-word-wrap: break-word;">init_object()</span>对数据区和<span style="-ms-word-wrap: break-word;">RedZone</span>进行标识,同时<span style="-ms-word-wrap: break-word;">init_tracking()</span>记录轨迹信息),然后<span style="-ms-word-wrap: break-word;">inc_slabs_node()</span>更新统计信息;最后将<span style="-ms-word-wrap: break-word;">slab</span>添加到<span style="-ms-word-wrap: break-word;">partial</span>链表中。

而进一步分析<span style="-ms-word-wrap: break-word;">new_slab()</span>的实现:


1. 【file:/mm/slub.c】
2. static struct page new_slab(struct kmem_cache s, gfp_t flags, int node)
3. {
4.     struct page page;
5.     void start;
6.     void last;
7.     void p;
8.     int order;
9.  
10.     BUG_ON(flags & GFP_SLAB_BUG_MASK);
11.  
12.     page = allocate_slab(s,
13.         flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
14.     if (!page)
15.         goto out;
16.  
17.     order = compound_order(page);
18.     inc_slabs_node(s, page_to_nid(page), page->objects);
19.     memcg_bind_pages(s, order);
20.     page->slab_cache = s;
21.     __SetPageSlab(page);
22.     if (page->pfmemalloc)
23.         SetPageSlabPfmemalloc(page);
24.  
25.     start = page_address(page);
26.  
27.     if (unlikely(s->flags & SLAB_POISON))
28.         memset(start, POISON_INUSE, PAGE_SIZE << order);
29.  
30.     last = start;
31.     for_each_object(p, s, start, page->objects) {
32.         setup_object(s, page, last);
33.         set_freepointer(s, last, p);
34.         last = p;
35.     }
36.     setup_object(s, page, last);
37.     set_freepointer(s, last, NULL);
38.  
39.     page->freelist = start;
40.     page->inuse = page->objects;
41.     page->frozen = 1;
42. out:
43.     return page;
44. }
&nbsp;

&nbsp;&nbsp; 首先通过<span style="-ms-word-wrap: break-word;">allocate_slab()</span>申请一个<span style="-ms-word-wrap: break-word;">slab</span>块,继而通过<span style="-ms-word-wrap: break-word;">compound_order()</span>从该<span style="-ms-word-wrap: break-word;">slab</span>的首个<span style="-ms-word-wrap: break-word;">page</span>结构中获取其占用页面的<span style="-ms-word-wrap: break-word;">order</span>信息,然后<span style="-ms-word-wrap: break-word;">inc_slabs_node()</span>更新内存管理节点的<span style="-ms-word-wrap: break-word;">slab</span>统计信息,而<span style="-ms-word-wrap: break-word;">memcg_bind_pages()</span>则是更新内存<span style="-ms-word-wrap: break-word;">cgroup</span>的页面信息;再接下来<span style="-ms-word-wrap: break-word;">page_address()</span>获取页面的虚拟地址,然后根据<span style="-ms-word-wrap: break-word;">SLAB_POISON</span>标识以确定是否<span style="-ms-word-wrap: break-word;">memset()</span>该<span style="-ms-word-wrap: break-word;">slab</span>的空间;最后则是<span style="-ms-word-wrap: break-word;">for_each_object()</span>遍历每一个对象,通过<span style="-ms-word-wrap: break-word;">setup_object()</span>初始化对象信息以及<span style="-ms-word-wrap: break-word;">set_freepointer()</span>设置空闲页面指针,最终将<span style="-ms-word-wrap: break-word;">slab</span>初始完毕。

结合初始化信息,可以总结出当创建的<span style="-ms-word-wrap: break-word;">slab</span>中对象在所有配置启用时的对象结构信息如图:

<span style="-ms-word-wrap: break-word;">![](http://blog.chinaunix.net/attachment/201511/20/26859697_1448033535v9UZ.png)</span>

&nbsp;

正如前面文章中对<span style="-ms-word-wrap: break-word;">kmem_cache</span>结构的成员解析,<span style="-ms-word-wrap: break-word;">object_size</span>是<span style="-ms-word-wrap: break-word;">slab</span>对象的实际大小,<span style="-ms-word-wrap: break-word;"> inuse</span>为元数据的偏移量(也表示对象实际使用大小),而<span style="-ms-word-wrap: break-word;">offset</span>为存放空闲对象指针的偏移;<span style="-ms-word-wrap: break-word;">inuse</span>和<span style="-ms-word-wrap: break-word;">offset</span>在图中显示是相等的,但是并非完全如此,<span style="-ms-word-wrap: break-word;">inuse</span>是必然有值的,而<span style="-ms-word-wrap: break-word;">offset</span>则是看情况了;至于结构体中的<span style="-ms-word-wrap: break-word;">size</span>成员表示的则是整个对象的大小。<span style="-ms-word-wrap: break-word;">SLAB_POISON</span>设置项仅是对对象<span style="-ms-word-wrap: break-word;">object_size</span>的大小做标识,而<span style="-ms-word-wrap: break-word;">SLAB_RED_ZONE</span>则是对象与空闲对象指针相距的空间做标识,至于<span style="-ms-word-wrap: break-word;">track</span>信息则是在空闲对象指针之后了。<span style="-ms-word-wrap: break-word;">BTW</span>,在<span style="-ms-word-wrap: break-word;">track</span>之后,根据<span style="-ms-word-wrap: break-word;">SLAB_RED_ZONE</span>的设置,新增了一块<span style="-ms-word-wrap: break-word;">sizeof(void *)</span>大小的空间好像并未被使用,可能代码看得不够细,后续再细细斟酌一番。

&nbsp;&nbsp;&nbsp; 回到代码往下,继续<span style="-ms-word-wrap: break-word;">allocate_slab()</span>函数的实现:


1. 【file:/mm/slub.c】
2. static struct page allocate_slab(struct kmem_cache s, gfp_t flags, int node)
3. {
4.     struct page page;
5.     struct kmem_cache_order_objects oo = s->oo;
6.     gfp_t alloc_gfp;
7.  
8.     flags &= gfp_allowed_mask;
9.  
10.     if (flags & __GFP_WAIT)
11.         local_irq_enable();
12.  
13.     flags |= s->allocflags;
14.  
15.     /
16.       Let the initial higher-order allocation fail under memory pressure
17.       so we fall-back to the minimum order allocation.
18.      /
19.     alloc_gfp = (flags | GFP_NOWARN | GFP_NORETRY) & ~__GFP_NOFAIL;
20.  
21.     page = alloc_slab_page(alloc_gfp, node, oo);
22.     if (unlikely(!page)) {
23.         oo = s->min;
24.         /
25.           Allocation may have failed due to fragmentation.
26.           Try a lower order alloc if possible
27.          /
28.         page = alloc_slab_page(flags, node, oo);
29.  
30.         if (page)
31.             stat(s, ORDER_FALLBACK);
32.     }
33.  
34.     if (kmemcheck_enabled && page
35.         && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
36.         int pages = 1 << oo_order(oo);
37.  
38.         kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
39.  
40.         /
41.           Objects from caches that have a constructor don't get
42.           cleared when they're allocated, so we need to do it here.
43.          */
44.         if (s->ctor)
45.             kmemcheck_mark_uninitialized_pages(page, pages);
46.         else
47.             kmemcheck_mark_unallocated_pages(page, pages);
48.     }
49.  
50.     if (flags & __GFP_WAIT)
51.         local_irq_disable();
52.     if (!page)
53.         return NULL;
54.  
55.     page->objects = oo_objects(oo);
56.     mod_zone_page_state(page_zone(page),
57.         (s->flags & SLAB_RECLAIM_ACCOUNT) ?
58.         NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
59.         1 << oo_order(oo));
60.  
61.     return page;
62. }
&nbsp;

如果申请<span style="-ms-word-wrap: break-word;">slab</span>所需页面设置<span style="-ms-word-wrap: break-word;">__GFP_WAIT</span>标志,表示运行等待,则将<span style="-ms-word-wrap: break-word;">local_irq_enable()</span>将中断使能;接着将尝试使用常规的<span style="-ms-word-wrap: break-word;">s-&gt;oo</span>配置进行<span style="-ms-word-wrap: break-word;">alloc_slab_page()</span>内存页面申请。如果申请失败,则将其调至<span style="-ms-word-wrap: break-word;">s-&gt;min</span>进行降阶再次尝试申请;如果申请成功,同时<span style="-ms-word-wrap: break-word;">kmemcheck</span>调测功能开启(<span style="-ms-word-wrap: break-word;">kmemcheck_enabled</span>为<span style="-ms-word-wrap: break-word;">true</span>)且<span style="-ms-word-wrap: break-word;">kmem_cache</span>的<span style="-ms-word-wrap: break-word;">flags</span>未标识<span style="-ms-word-wrap: break-word;">SLAB_NOTRACK</span>或<span style="-ms-word-wrap: break-word;">DEBUG_DEFAULT_FLAGS</span>,将会进行<span style="-ms-word-wrap: break-word;">kmemcheck</span>内存检测的初始化设置。接着根据<span style="-ms-word-wrap: break-word;">flags</span>的<span style="-ms-word-wrap: break-word;">__GFP_WAIT</span>标识与否将中断功能禁用。最后通过<span style="-ms-word-wrap: break-word;">mod_zone_page_state</span>计算更新内存管理区的状态统计。

&nbsp;&nbsp;&nbsp; 其中的<span style="-ms-word-wrap: break-word;">alloc_slab_page()</span>则是通过<span style="-ms-word-wrap: break-word;">Buddy</span>伙伴算法进行内存分配:


1. 【file:/mm/slub.c】
2. /
3.   Slab allocation and freeing
4.  /
5. static inline struct page alloc_slab_page(gfp_t flags, int node,
6.                     struct kmem_cache_order_objects oo)
7. {
8.     int order = oo_order(oo);
9.  
10.     flags |= __GFP_NOTRACK;
11.  
12.     if (node == NUMA_NO_NODE)
13.         return alloc_pages(flags, order);
14.     else
15.         return alloc_pages_exact_node(node, flags, order);
16. }
&nbsp;

伙伴算法就不再做重复讲解了。

回到<span style="-ms-word-wrap: break-word;">kmem_cache_open()</span>函数,在<span style="-ms-word-wrap: break-word;">init_kmem_cache_nodes()</span>之后,如果初始化成功,则将会继而调用<span style="-ms-word-wrap: break-word;">alloc_kmem_cache_cpus()</span>:


1. 【file:/mm/slub.c】
2. static inline int alloc_kmem_cache_cpus(struct kmem_cache s)
3. {
4.     BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
5.             KMALLOC_SHIFT_HIGH sizeof(struct kmem_cache_cpu));
6.  
7.     /
8.       Must align to double word boundary for the double cmpxchg
9.       instructions to work; see __pcpu_double_call_return_bool().
10.      /
11.     s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
12.                      2 sizeof(void ));
13.  
14.     if (!s->cpu_slab)
15.         return 0;
16.  
17.     init_kmem_cache_cpus(s);
18.  
19.     return 1;
20. }
&nbsp;

该函数主要通过<span style="-ms-word-wrap: break-word;">__alloc_percpu()</span>为每个<span style="-ms-word-wrap: break-word;">CPU</span>申请空间,然后通过<span style="-ms-word-wrap: break-word;">init_kmem_cache_cpus()</span>将申请空间初始化至每个<span style="-ms-word-wrap: break-word;">CPU</span>上。


1. 【file:/mm/slub.c】
2. static void init_kmem_cache_cpus(struct kmem_cache *s)
3. {
4.     int cpu;
5.  
6.     for_each_possible_cpu(cpu)
7.         per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
8. }
&nbsp;

至此,<span style="-ms-word-wrap: break-word;">slub</span>算法中的缓冲区创建分析完毕。