redis源码阅读-dict(字典)

¶redis源码阅读-dict(字典)

¶字典结构

¶字典主要结构

字典主要分四个结构：dictEntry,dictht,dict,dictType。

dictEntry，是存放键值对元素的实体结构，其可以组成链表结构。
dictht，是存放由dictEntry组成的数组，形成hash表。
dict,则是存放dictht，形成一个字典。
dictType，是字典需要的动作。
我们可以把dictht中的hash表看作为一个数组，数组中的每个元素被成为一个桶，这个桶存放的是一个以dictEntry形成的链表。dict中存放有两张hash表，组成新旧两张表，这是用来进行rehash的。

¶rehash

rehash,渐进式迁移，这是redis中用来改变表大小的一种手段。redis中有新旧两张表，旧表存储的是字典数据，一般新表是空的。
当字典需要改变时，比如扩展容量，缩小容量等。如果字典存储的数据量太大，一次改变大小会进行很久，而redis是一个单进程程序，这样会造成阻塞。
而采用渐进式迁移，先为新表分配需要改变的大小，然后将旧表中的数据一个个的迁移进新表，而且这个迁移过程被分为许多步，在迁移过程中可以被中断。
中断之后，还没迁移完成的数据存在旧表中，迁移完成的数据就存放在新表中，数据并不会消失。

¶字典重要函数介绍

¶dictScan函数

主要是其使用的算法很经典，值得研究。dictScan函数为了减少重复遍历元素，其采用 reverse binary iteration迭代器算法，此算法由Pieter Noordhuis创造。
主要原理：

最高位+1,向低位进位，在字典不稳定的情况下，既要遍历到所有没被删除的元素，又要尽可能较少的重复遍历。这么做是最好的
hash表的扩容和缩小都是以2的幂次进行的,如果使用这种方法，每次hash表改变时，游标的状态都不会变。游标所遍历的节点大部分都会在改变后表的游标的前面，不用再被遍历。 
例如：
    每次加法都是最高位+1,向低位进位
    hash为8位时：000 -> 100 -> 010 -> 110 -> 001 -> 101 -> 011 -> 111 -> 000 
    hash表为16位时：0000 -> 1000 -> 0100 -> 1100 -> 0010 -> 1010 -> 0110 -> 
    1110 -> 0001 -> 1001 -> 0101 -> 1101 -> 0011 -> 1011 -> 
    0111 -> 1111 -> 0000 
    
    当我在hash表为8位时，游标是110,我已经遍历了000,100,010;如果hash表扩张到16位，游标在新表中的位置是0110
    看一下游标之前的有哪些，0000,1000,0100,1100,0010,1010;恰好，扩张前已经遍历的都在其中。

¶dict.h

/* Hash Tables Implementation.
 *
 * This file implements in-memory hash tables with insert/del/replace/find/
 * get-random-element operations. Hash tables will auto-resize if needed
 * tables of power of two in size are used, collisions are handled by
 * chaining. See the source code for more information... :)
 *
 * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   * Redistributions of source code must retain the above copyright notice,
 *     this list of conditions and the following disclaimer.
 *   * Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *   * Neither the name of Redis nor the names of its contributors may be used
 *     to endorse or promote products derived from this software without
 *     specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/**********************************
	这个文件实现了在内存中的hash表的insert/del/replace/find/get-random-element操作
	如果发现被用的新旧两个hash表已经满了，就自动扩展
***********************************/
#include <stdint.h>

#ifndef __DICT_H
#define __DICT_H

#define DICT_OK 0
#define DICT_ERR 1

/* Unused arguments generate annoying warnings... */
#define DICT_NOTUSED(V) ((void) V)

// 一个键值对的数据结构，存储的是一对键值对。这可以组成一个链表
typedef struct dictEntry {
    void *key;
    union { // 联合体，占用字节数按其中最大的一个类型计算
        void *val; // 空类型，可以表示任意
        uint64_t u64; // 无符号长整型
        int64_t s64; // 长整型
        double d; // 双精度浮点数
    } v;
    struct dictEntry *next; // 键值对指针，可以查找下一个键值对
} dictEntry;

// 其中是操作字典类型的各种动作
typedef struct dictType {
    uint64_t (*hashFunction)(const void *key);
    void *(*keyDup)(void *privdata, const void *key); // 键复制
    void *(*valDup)(void *privdata, const void *obj); // 值复制
    int (*keyCompare)(void *privdata, const void *key1, const void *key2); // 键比较
    void (*keyDestructor)(void *privdata, void *key); // 销毁键
    void (*valDestructor)(void *privdata, void *obj); // 销毁值
} dictType;

/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
// 字典类型的hash表
typedef struct dictht {
	/* 哈希表数组
		一个数组,每个元素位存储的是一个dictEntry,而dictEntry可以组成一个链表。这就使得数组中的每个元素可以作为一个链表的头结点，这样一个数组元素可以存储多个数据。
		java中的hashMap等hash结构也是使用hash表，可以对照看
	 */
    dictEntry **table; 
    unsigned long size; // 哈希表大小
    unsigned long sizemask; // 哈希表大小掩码，用于计算索引值，总是等于size=-1
    unsigned long used; // 哈希表已有的节点数
} dictht;

// 一个完整的字典类型，其中包含字典动作，字典数据，字典的hash表
// 可以把这个结构体看做是一个完整的类，type就是类方法，private就是类的私有属性，其他也是类的属性
typedef struct dict {
    dictType *type;
    void *privdata;
	/* ht数组可以容纳两个元素，这每个元素就是dictht类型的元素。
		功能：
			维护两张hash表，作用等同于一对滚动数组。一张是旧表，一张是新表，当hash表大小需要被改变时，先创建一个新表，旧表中的元素就往新表迁移。
			当下一次hash表变动时，新表就变为旧表，重新开始以上动作。
	 */
    dictht ht[2]; 
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */ // 用来标志是否需要渐进式hash
    unsigned long iterators; /* number of iterators currently running */ // 字典迭代器号
} dict;

/* If safe is set to 1 this is a safe iterator, that means, you can call
 * dictAdd, dictFind, and other functions against the dictionary even while
 * iterating. Otherwise it is a non safe iterator, and only dictNext()
 * should be called while iterating. */
 // 字典迭代器 , 在字典中，迭代一个个的dictEntry元素
 // https://blog.csdn.net/dengzhilong_cpp/article/details/54953911
typedef struct dictIterator {
    dict *d; // 指向迭代器处理的字典
    long index; // hash数组中的索引，是dictht结构里的table的下标。
	/*
		safe是用来确定迭代器是否安全，safe=1时，表示迭代器安全，可以使用dictAdd,dictFind等函数
		如果safe不等于1，表示迭代器不安全，只能使用dictNext函数进行迭代
		table是dict里的dictht数组的下标，标识ht是ht[0]还是ht[1]
	 */
    int table, safe;
    dictEntry *entry, *nextEntry;
    /* unsafe iterator fingerprint for misuse detection. */
    long long fingerprint; // 是字典的指纹,是对不安全的迭代器的误用检测
} dictIterator;

// 字典扫描函数指针
typedef void (dictScanFunction)(void *privdata, const dictEntry *de);
typedef void (dictScanBucketFunction)(void *privdata, dictEntry **bucketref);

/* This is the initial size of every hash table */
// 每个hash表初始化的大小
#define DICT_HT_INITIAL_SIZE     4

/* ------------------------------- Macros ------------------------------------*/
// 字典为指定键值对释放值,(d)->type->valDestructor表示函数指针不为空(已经指向了一个执行函数),可以通过这个执行函数获取结果
#define dictFreeVal(d, entry) \
    if ((d)->type->valDestructor) \
        (d)->type->valDestructor((d)->privdata, (entry)->v.val)
// 字典为指定键值对设置值
// 将_val_值复制给(d)->privdata
// 然后将值再复制给键值对
#define dictSetVal(d, entry, _val_) do { \
    if ((d)->type->valDup) \
        (entry)->v.val = (d)->type->valDup((d)->privdata, _val_); \
    else \
        (entry)->v.val = (_val_); \
} while(0)

/************************为键值对设置值***********************/
// 为指定键值对设置有符号整型值
#define dictSetSignedIntegerVal(entry, _val_) \
    do { (entry)->v.s64 = _val_; } while(0)

// 为指定键值对设置无符号整型值
#define dictSetUnsignedIntegerVal(entry, _val_) \
    do { (entry)->v.u64 = _val_; } while(0)

// 为指定键值对设置double值
#define dictSetDoubleVal(entry, _val_) \
    do { (entry)->v.d = _val_; } while(0)

// 从字典中释放键值对
#define dictFreeKey(d, entry) \
    if ((d)->type->keyDestructor) \
        (d)->type->keyDestructor((d)->privdata, (entry)->key)

// 为字典设置键
#define dictSetKey(d, entry, _key_) do { \
    if ((d)->type->keyDup) \
        (entry)->key = (d)->type->keyDup((d)->privdata, _key_); \
    else \
        (entry)->key = (_key_); \
} while(0)

// 字典中键的比较
#define dictCompareKeys(d, key1, key2) \
    (((d)->type->keyCompare) ? \
        (d)->type->keyCompare((d)->privdata, key1, key2) : \
        (key1) == (key2))
		
// 为字典中的键获取hash值
#define dictHashKey(d, key) (d)->type->hashFunction(key)
// 从键值对中获取key
#define dictGetKey(he) ((he)->key)
// 从键值对中获取val(空指针类型)
#define dictGetVal(he) ((he)->v.val)
// 从键值对中获取s64(int64_t类型)
#define dictGetSignedIntegerVal(he) ((he)->v.s64)
// 从键值对中获取u64(uint64_t类型)
#define dictGetUnsignedIntegerVal(he) ((he)->v.u64)
// 从键值对中获取d(double类型)
#define dictGetDoubleVal(he) ((he)->v.d)
// 获取字典中新旧hash表被分配的总大小
#define dictSlots(d) ((d)->ht[0].size+(d)->ht[1].size)
// 获取字典中新旧hash被用的总节点数
#define dictSize(d) ((d)->ht[0].used+(d)->ht[1].used)
// 判断字典是否渐进式
#define dictIsRehashing(d) ((d)->rehashidx != -1)

/* API */
// 字典创建
dict *dictCreate(dictType *type, void *privDataPtr);
// 字典扩展
int dictExpand(dict *d, unsigned long size);
// 为字典增加键值对
int dictAdd(dict *d, void *key, void *val);
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing);
dictEntry *dictAddOrFind(dict *d, void *key);
int dictReplace(dict *d, void *key, void *val);
int dictDelete(dict *d, const void *key);
dictEntry *dictUnlink(dict *ht, const void *key);
void dictFreeUnlinkedEntry(dict *d, dictEntry *he);
void dictRelease(dict *d);
dictEntry * dictFind(dict *d, const void *key);
void *dictFetchValue(dict *d, const void *key);
int dictResize(dict *d);
dictIterator *dictGetIterator(dict *d);
dictIterator *dictGetSafeIterator(dict *d);
dictEntry *dictNext(dictIterator *iter);
void dictReleaseIterator(dictIterator *iter);
dictEntry *dictGetRandomKey(dict *d);
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
void dictGetStats(char *buf, size_t bufsize, dict *d);
uint64_t dictGenHashFunction(const void *key, int len);
uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len);
void dictEmpty(dict *d, void(callback)(void*));
void dictEnableResize(void);
void dictDisableResize(void);
int dictRehash(dict *d, int n);
int dictRehashMilliseconds(dict *d, int ms);
void dictSetHashFunctionSeed(uint8_t *seed);
uint8_t *dictGetHashFunctionSeed(void);
unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, dictScanBucketFunction *bucketfn, void *privdata);
uint64_t dictGetHash(dict *d, const void *key);
dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, uint64_t hash);

/* Hash table types */
extern dictType dictTypeHeapStringCopyKey;
extern dictType dictTypeHeapStrings;
extern dictType dictTypeHeapStringCopyKeyValue;

#endif /* __DICT_H */

¶dict.c

/* Hash Tables Implementation.
 *
 * This file implements in memory hash tables with insert/del/replace/find/
 * get-random-element operations. Hash tables will auto resize if needed
 * tables of power of two in size are used, collisions are handled by
 * chaining. See the source code for more information... :)
 *
 * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   * Redistributions of source code must retain the above copyright notice,
 *     this list of conditions and the following disclaimer.
 *   * Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *   * Neither the name of Redis nor the names of its contributors may be used
 *     to endorse or promote products derived from this software without
 *     specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "fmacros.h" // 提供mac兼容

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdarg.h>
#include <limits.h>
#include <sys/time.h>

#include "dict.h" // 提供定义的字典数据结构和字典需要的函数原型
#include "zmalloc.h" // 提供redis包装的内存分配函数
#ifndef DICT_BENCHMARK_MAIN // 如果有判断字典的检测主函数，就需要使用断言的函数
#include "redisassert.h"
#else
#include <assert.h> // 否则使用默认的断言
#endif

/* Using dictEnableResize() / dictDisableResize() we make possible to
 * enable/disable resizing of the hash table as needed. This is very important
 * for Redis, as we use copy-on-write and don't want to move too much memory
 * around when there is a child performing saving operations.
 *
 * Note that even when dict_can_resize is set to 0, not all resizes are
 * prevented: a hash table is still allowed to grow if the ratio between
 * the number of elements and the buckets > dict_force_resize_ratio. */
 // 标志位，字典是否能重设大小。等于1表示能，等于0表示不能
 //有种情况下可以：总的元素个数/dict桶的个数>dict_force_resize_ratio
 // 含义：hash表示由数组+链表组成，一个数组元素形成的链表就是一个桶(hash会遇到碰撞问题，如果一个新增的元素的hash值与数组中的一个元素相同，则这个新增的元素就挂在相同元素的前面，形成链表，就成一个桶了。)
 // 如果桶存储的元素太多，每次查询时都会到一个元素所在位置，然后查链表，链表的查询效率本来就比数组低。这样查询效率就很低
static int dict_can_resize = 1;
static unsigned int dict_force_resize_ratio = 5; // 平均桶的深度

/* -------------------------- private prototypes ---------------------------- */
// 私有函数

// 如果需要字典可以进行扩展
static int _dictExpandIfNeeded(dict *ht);
// 获取字典的真实大小
static unsigned long _dictNextPower(unsigned long size);
// 通过key进行字典索引
static long _dictKeyIndex(dict *ht, const void *key, uint64_t hash, dictEntry **existing);
// 字典初始化
static int _dictInit(dict *ht, dictType *type, void *privDataPtr);

/* -------------------------- hash functions -------------------------------- */

static uint8_t dict_hash_function_seed[16];

void dictSetHashFunctionSeed(uint8_t *seed) {
    memcpy(dict_hash_function_seed,seed,sizeof(dict_hash_function_seed));
}

uint8_t *dictGetHashFunctionSeed(void) {
    return dict_hash_function_seed;
}

/* The default hashing function uses SipHash implementation
 * in siphash.c. */

uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k);
uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k);

uint64_t dictGenHashFunction(const void *key, int len) {
    return siphash(key,len,dict_hash_function_seed);
}

uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len) {
    return siphash_nocase(buf,len,dict_hash_function_seed);
}

/* ----------------------------- API implementation ------------------------- */

/* Reset a hash table already initialized with ht_init().
 * NOTE: This function should only be called by ht_destroy(). */
 // 重置hash表
static void _dictReset(dictht *ht)
{
    ht->table = NULL;
    ht->size = 0;
    ht->sizemask = 0;
    ht->used = 0;
}

/* Create a new hash table */
// 创建一个新的hash表
dict *dictCreate(dictType *type,
        void *privDataPtr)
{
    dict *d = zmalloc(sizeof(*d));

    _dictInit(d,type,privDataPtr);
    return d;
}

/* Initialize the hash table */
// 初始化hash表
int _dictInit(dict *d, dictType *type,
        void *privDataPtr)
{
	// 先重置新表和旧表
    _dictReset(&d->ht[0]);
    _dictReset(&d->ht[1]);
    d->type = type; // 初始化操作字典的动作
    d->privdata = privDataPtr; // 初始化字典值
    d->rehashidx = -1; // 初始化为不需要渐进式hash
    d->iterators = 0; // 初始化为安全的迭代器
    return DICT_OK;
}

/* Resize the table to the minimal size that contains all the elements,
 * but with the invariant of a USED/BUCKETS ratio near to <= 1 */
 // 重置字典的hash表大小(这个大小需要包含所有的元素)
int dictResize(dict *d)
{
    int minimal;
	
	// 这个字典必须能被重置，并且这个字典是渐进式hash，能进行新旧表迁移
    if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
    minimal = d->ht[0].used; // 获取旧表使用的元素节点数
    if (minimal < DICT_HT_INITIAL_SIZE) // 如果节点数小于字典初始大小
        minimal = DICT_HT_INITIAL_SIZE; // 这个字典重置的最小值扩充为字典初始大小
    return dictExpand(d, minimal); // 然后扩容
}

/* Expand or create the hash table */
// 扩容或者创建hash表
int dictExpand(dict *d, unsigned long size)
{
    /* the size is invalid if it is smaller than the number of
     * elements already inside the hash table */
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;

    dictht n; /* the new hash table */
    unsigned long realsize = _dictNextPower(size); // 扩容后的大小是2^n > size

    /* Rehashing to the same table size is not useful. */
	// 如果扩容后的大小与旧表大小相同，表示根本没有扩容
    if (realsize == d->ht[0].size) return DICT_ERR;

    /* Allocate the new hash table and initialize all pointers to NULL */
    n.size = realsize; // 将扩容的大小赋值给字典的size属性
    n.sizemask = realsize-1; // saize的掩码是realsize-1
    n.table = zcalloc(realsize*sizeof(dictEntry*)); // 为hash表分配realsize个dictEntry大小的内存，calloc的size已被zcaloc设为默认的realsize*sizeof(dictEntry*)+PREFIX_SIZE
    n.used = 0; // 被用的节点数

    /* Is this the first initialization? If so it's not really a rehashing
     * we just set the first hash table so that it can accept keys. */
    if (d->ht[0].table == NULL) { // 如果第一个hash表为空，表示数据在ht[1]中，需要将ht[1]的数据迁移到ht[0]
        d->ht[0] = n; // ht[0]为新表设置扩容后的分配的内存
        return DICT_OK;
    }

    /* Prepare a second hash table for incremental rehashing */
	// 如果第一个表不为空，则准备第二个表进行渐进式hash
    d->ht[1] = n; // ht[1]表为新表，为其设置扩容后的分配的内存
    d->rehashidx = 0; // 表示可以进行渐进式hash
    return DICT_OK;
}

/* Performs N steps of incremental rehashing. Returns 1 if there are still
 * keys to move from the old to the new hash table, otherwise 0 is returned.
 *
 * Note that a rehashing step consists in moving a bucket (that may have more
 * than one key as we use chaining) from the old to the new hash table, however
 * since part of the hash table may be composed of empty spaces, it is not
 * guaranteed that this function will rehash even a single bucket, since it
 * will visit at max N*10 empty buckets in total, otherwise the amount of
 * work it does would be unbound and the function may block for a long time. */
 // n表示需要rehash的次数
 // 数据的迁移不是一次性完成的，而是可以通过dictRehash()这个函数分步规划的，并且调用方可以及时知道是否需要继续进行渐进式哈希操作。
 // 如果dict数据结构中存储了海量的数据，那么一次性迁移势必带来redis性能的下降，redis是单线程模型，在实时性要求高的场景下这可能是致命的。
 // 而渐进式哈希则将这种代价可控地分摊了，调用方可以在dict做插入，删除，更新的时候执行dictRehash()，最小化数据迁移的代价。
 // 在迁移的过程中，数据是在新表还是旧表中并不是一个非常急迫的需求，迁移的过程并不会丢失数据，在旧表中找不到再到新表中寻找就是了。
int dictRehash(dict *d, int n) {
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    if (!dictIsRehashing(d)) return 0; // 如果字典不能被渐进式

	// 一桶一桶的迁移
    while(n-- && d->ht[0].used != 0) { // hash表的被用节点数不能为0,这里是hash表的数组节点遍历
        dictEntry *de, *nextde; // 定义两个字典指针

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx); // 断言，字典大小必须大于渐进式hash过程的索引(rehashidx是用来确定新表中与旧表对应的索引位置)
		// 为了跳过空桶
        while(d->ht[0].table[d->rehashidx] == NULL) { // 如果新表与旧表对应的索引在旧表中位置的元素为NULL，表示旧表已经遍历完，整个旧表已经全部复制到新表
            d->rehashidx++;
            if (--empty_visits == 0) return 1; // 能被访问的空桶已经没有了
        }
        de = d->ht[0].table[d->rehashidx]; // 得到的是hash表中对应的元素
        /* Move all the keys in this bucket from the old to the new hash HT */
        while(de) { // 遍历hash表元素形成的链表
            uint64_t h;

            nextde = de->next;
            /* Get the index in the new hash table */
			// 获取新表索引
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h]; // 旧表当前hash元素的向后指针指向新表
            d->ht[1].table[h] = de; // 让旧表的当前hash与新表建立关系，进入新表
            d->ht[0].used--; // 旧表节点减少一个
            d->ht[1].used++; // 新表节点增加一个
            de = nextde; // 遍历下一个
        }
        d->ht[0].table[d->rehashidx] = NULL; // 将旧表当前索引置NULL
        d->rehashidx++; // 渐进式hash索引+1
    }

    /* Check if we already rehashed the whole table... */
	// 如果已经遍历完整个表,将旧表分配的内存释放
    if (d->ht[0].used == 0) {
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1]; // 将重新分配的内存从新表迁移回到旧表
        _dictReset(&d->ht[1]); // 重置新表
        d->rehashidx = -1; // 此时渐进式迁移索引-1
        return 0;
    }

    /* More to rehash... */
    return 1; // 否则表示数据还没有迁移完成
}

// 获取当前微秒级时间
long long timeInMilliseconds(void) {
    struct timeval tv;

    gettimeofday(&tv,NULL);
    return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);
}

/* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */
// 在ms微秒和ms+1微秒之间渐进式迁移hsah表的进度(rehashes表示一张hash表rehash的次数)
int dictRehashMilliseconds(dict *d, int ms) {
    long long start = timeInMilliseconds();
    int rehashes = 0;
	
	// 每次都进行100次rehash操作
    while(dictRehash(d,100)) {
        rehashes += 100;
        if (timeInMilliseconds()-start > ms) break;
    }
    return rehashes;
}

/* This function performs just a step of rehashing, and only if there are
 * no safe iterators bound to our hash table. When we have iterators in the
 * middle of a rehashing we can't mess with the two hash tables otherwise
 * some element can be missed or duplicated.
 *
 * This function is called by common lookup or update operations in the
 * dictionary so that the hash table automatically migrates from H1 to H2
 * while it is actively used. */
 // 一步一步的rehash
static void _dictRehashStep(dict *d) {
    if (d->iterators == 0) dictRehash(d,1); // 每次都进行一次rehash操作
}

/* Add an element to the target hash table */
// 增加一个元素到字典中的hash表中
int dictAdd(dict *d, void *key, void *val)
{
    dictEntry *entry = dictAddRaw(d,key,NULL); // 先将键加入字典实体,创建一个新的键值对实体,先不设置值。

    if (!entry) return DICT_ERR; 
    dictSetVal(d, entry, val); // 然后将值加入字典中的对应的键的实体中
    return DICT_OK;
}

/* Low level add or find:
 * This function adds the entry but instead of setting a value returns the
 * dictEntry structure to the user, that will make sure to fill the value
 * field as he wishes.
 *
 * This function is also directly exposed to the user API to be called
 * mainly in order to store non-pointers inside the hash value, example:
 *
 * entry = dictAddRaw(dict,mykey,NULL);
 * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
 *
 * Return values:
 *
 * If key already exists NULL is returned, and "*existing" is populated
 * with the existing entry if existing is not NULL.
 *
 * If key was added, the hash entry is returned to be manipulated by the caller.
 */
 // 只是增加一个包含键的实体结构，不为实体设置值。如果需要增加的实体存在，则让存在的实体将existing填充
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
    long index; // 字典中的索引
    dictEntry *entry; // 字典中的键值对实体
    dictht *ht; // 字典中的hash表

    if (dictIsRehashing(d)) _dictRehashStep(d); // 如果该字典正在rehash过程中，那接着执行rehash

    /* Get the index of the new element, or -1 if
     * the element already exists. */
	 // 获取新元素在字典中的索引，如果元素已经存在，则返回-1
    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
        return NULL;

    /* Allocate the memory and store the new entry.
     * Insert the element in top, with the assumption that in a database
     * system it is more likely that recently added entries are accessed
     * more frequently. */
	 // 如果该字典正在rehash，就让指针ht指向新表，在新表分配内存
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry)); // 为实体分配内存
    entry->next = ht->table[index]; // 将实体加入hash表
    ht->table[index] = entry;
    ht->used++;

    /* Set the hash entry fields. */
	// 将键设置到entry中，然后将entry设置到字典中
    dictSetKey(d, entry, key);
    return entry;
}

/* Add or Overwrite:
 * Add an element, discarding the old value if the key already exists.
 * Return 1 if the key was added from scratch, 0 if there was already an
 * element with such key and dictReplace() just performed a value update
 * operation. */
 // 如果字典中有这个元素，就将旧元素替代，如果没有就增加进去
int dictReplace(dict *d, void *key, void *val)
{
    dictEntry *entry, *existing, auxentry;

    /* Try to add the element. If the key
     * does not exists dictAdd will succeed. */
	 // 将key增加到实体中，如果字典中存在有相同的，则返回NULL,并且existing被存在的实体填充
	 // 如果字典中不存在相同的实体，则将实体加入字典，并返回加入后的实体
    entry = dictAddRaw(d,key,&existing);
    if (entry) { // 如果entry不为空，表示原来的实体不存在，就直接将新实体加入
        dictSetVal(d, entry, val);
        return 1;
    }

    /* Set the new value and free the old one. Note that it is important
     * to do that in this order, as the value may just be exactly the same
     * as the previous one. In this context, think to reference counting,
     * you want to increment (set), and then decrement (free), and not the
     * reverse. */
	 // 如果有相同的实体，让auxentry指针指向指向实体的指针existing,找到该实体中值的地址
    auxentry = *existing;
    dictSetVal(d, existing, val); // 将更新后的实体加入字典
    dictFreeVal(d, &auxentry); // 将原来存在的值所在地址分配的内存释放
    return 0;
}

/* Add or Find:
 * dictAddOrFind() is simply a version of dictAddRaw() that always
 * returns the hash entry of the specified key, even if the key already
 * exists and can't be added (in that case the entry of the already
 * existing key is returned.)
 *
 * See dictAddRaw() for more information. */
 // 是dictAddRaw的一种应用(如果增加成功就返回增加成功的实体；如果增加失败就返回存在的实体)
dictEntry *dictAddOrFind(dict *d, void *key) {
    dictEntry *entry, *existing;
    entry = dictAddRaw(d,key,&existing); 
    return entry ? entry : existing; // 如果entry返回NULL，表示有相同的实体，那existing就是那个相同实体，就返回existing;如果entry不为空，表示已经增加实体，那就返回entry
}

/* Search and remove an element. This is an helper function for
 * dictDelete() and dictUnlink(), please check the top comment
 * of those functions. */
  /*************************************************
 dictUnlink和dictFreeUnlinkedEntryl两个函数加起来就是dictGenericDelete函数
 dictUnlink和dictFreeUnlinkedEntryl其实就是dictGenericDelete分成两步来做
 dictDelete则是对dictGenericDelete函数的应用
  *************************************************/
 
 
 // 通过key搜寻字典中的键值对实体并移除
static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
    uint64_t h, idx;
    dictEntry *he, *prevHe;
    int table;

	// 如果新旧表被用的节点都为0，表示这个字典中的元素为空
    if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;

    if (dictIsRehashing(d)) _dictRehashStep(d); // 如果该字典正在进行rehash，那就执行下一步rehash
    h = dictHashKey(d, key); // 通过key获取字典中对应实体的hash值

    for (table = 0; table <= 1; table++) { // 对两个表进行查询
        idx = h & d->ht[table].sizemask; // 获取key所在位置的索引值
        he = d->ht[table].table[idx]; // 获取table表的idx索引下的节点
        prevHe = NULL;
        while(he) {// 移除节点he
            if (key==he->key || dictCompareKeys(d, key, he->key)) { // 判断输入的key是否是在该节点下
                /* Unlink the element from the list */
                if (prevHe)
                    prevHe->next = he->next;
                else
                    d->ht[table].table[idx] = he->next;
                if (!nofree) {
                    dictFreeKey(d, he);
                    dictFreeVal(d, he);
                    zfree(he);
                }
                d->ht[table].used--;
                return he;
            }
            prevHe = he;
            he = he->next; // 如果没有在该节点下，则继续向后遍历
        }
        if (!dictIsRehashing(d)) break;
    }
    return NULL; /* not found */
}

/* Remove an element, returning DICT_OK on success or DICT_ERR if the
 * element was not found. */
 //nofree默认为0.需要将内存释放
int dictDelete(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
}

/* Remove an element from the table, but without actually releasing
 * the key, value and dictionary entry. The dictionary entry is returned
 * if the element was found (and unlinked from the table), and the user
 * should later call `dictFreeUnlinkedEntry()` with it in order to release it.
 * Otherwise if the key is not found, NULL is returned.
 *
 * This function is useful when we want to remove something from the hash
 * table but want to use its value before actually deleting the entry.
 * Without this function the pattern would require two lookups:
 *
 *  entry = dictFind(...);
 *  // Do something with entry
 *  dictDelete(dictionary,entry);
 *
 * Thanks to this function it is possible to avoid this, and use
 * instead:
 *
 * entry = dictUnlink(dictionary,entry);
 * // Do something with entry
 * dictFreeUnlinkedEntry(entry); // <- This does not need to lookup again.
 */

 
 // nofree默认为1，表示虽然删除该实体，但是不会释放内存
dictEntry *dictUnlink(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,1);
}

/* You need to call this function to really free the entry after a call
 * to dictUnlink(). It's safe to call this function with 'he' = NULL. */
 // 用于释放dictUnlink函数留下的内存
void dictFreeUnlinkedEntry(dict *d, dictEntry *he) {
    if (he == NULL) return;
    dictFreeKey(d, he);
    dictFreeVal(d, he);
    zfree(he);
}

/* Destroy an entire dictionary */
// 销毁字典
/*
	dict *d     : 需要被销毁的字典
	dictht *ht  : 字典中的hash表
	void(callback)(void *) : 一个函数指针，回调函数，需要传入一个函数
 */
int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
    unsigned long i;

    /* Free all the elements */
	// 释放所有的元素
    for (i = 0; i < ht->size && ht->used > 0; i++) {
        dictEntry *he, *nextHe;

        if (callback && (i & 65535) == 0) callback(d->privdata);

        if ((he = ht->table[i]) == NULL) continue;
        while(he) { // 遍历桶中的链表并释放
            nextHe = he->next;
            dictFreeKey(d, he);
            dictFreeVal(d, he);
            zfree(he);
            ht->used--;
            he = nextHe;
        }
    }
    /* Free the table and the allocated cache structure */
    zfree(ht->table); // 释放表
    /* Re-initialize the table */
    _dictReset(ht); // 重置ht结构
    return DICT_OK; /* never fails */
}

/* Clear & Release the hash table */
// 清除和释放hash表
void dictRelease(dict *d)
{
    _dictClear(d,&d->ht[0],NULL);
    _dictClear(d,&d->ht[1],NULL);
    zfree(d);
}

// 通过key查找对应的实体
dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    uint64_t h, idx, table;

    if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */
    if (dictIsRehashing(d)) _dictRehashStep(d);
    h = dictHashKey(d, key);
    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask; // 从0开始
        he = d->ht[table].table[idx];
        while(he) { // 一个一个的比对，对上了就返回
            if (key==he->key || dictCompareKeys(d, key, he->key))
                return he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) return NULL;
    }
    return NULL;
}
// 通过key获取value
// void不是不返回，而是返回一个无类型指针，可以被转换为任何类型
void *dictFetchValue(dict *d, const void *key) {
    dictEntry *he;

    he = dictFind(d,key);
    return he ? dictGetVal(he) : NULL;
}

/* A fingerprint is a 64 bit number that represents the state of the dictionary
 * at a given time, it's just a few dict properties xored together.
 * When an unsafe iterator is initialized, we get the dict fingerprint, and check
 * the fingerprint again when the iterator is released.
 * If the two fingerprints are different it means that the user of the iterator
 * performed forbidden operations against the dictionary while iterating. */
 // 字典指纹，是一个64位的整数(一个64位的hash值)
long long dictFingerprint(dict *d) {
    long long integers[6], hash = 0;
    int j;

    integers[0] = (long) d->ht[0].table; // 第一位保存旧表(转换为long型)
    integers[1] = d->ht[0].size; // 第二位保存旧表总数
    integers[2] = d->ht[0].used; // 第三位保存旧表被用数
    integers[3] = (long) d->ht[1].table; // 第四位保存新表(转换为long型)
    integers[4] = d->ht[1].size; // 第五位保存新表总数
    integers[5] = d->ht[1].used; // 第六位保存新表被用数

    /* We hash N integers by summing every successive integer with the integer
     * hashing of the previous sum. Basically:
     *
     * Result = hash(hash(hash(int1)+int2)+int3) ...
     *
     * This way the same set of integers in a different order will (likely) hash
     * to a different number. */
    for (j = 0; j < 6; j++) {
        hash += integers[j];
        /* For the hashing step we use Tomas Wang's 64 bit integer hash. */
        hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1; hash << 21 ： hash的21次幂
        hash = hash ^ (hash >> 24); // hash的(hash >> 24)次幂
        hash = (hash + (hash << 3)) + (hash << 8); // hash * 265
        hash = hash ^ (hash >> 14);
        hash = (hash + (hash << 2)) + (hash << 4); // hash * 21
        hash = hash ^ (hash >> 28);
        hash = hash + (hash << 31);
    }
    return hash;
}

// 获取字典的迭代器
dictIterator *dictGetIterator(dict *d)
{
    dictIterator *iter = zmalloc(sizeof(*iter)); // 为迭代器分配内存

	// 初始化字典迭代器
    iter->d = d;
    iter->table = 0;
    iter->index = -1;
    iter->safe = 0;
    iter->entry = NULL;
    iter->nextEntry = NULL;
    return iter;
}

// 安全的字典迭代器
dictIterator *dictGetSafeIterator(dict *d) {
    dictIterator *i = dictGetIterator(d); // 获取到字典的迭代器

    i->safe = 1; // 如果迭代器的safe变量为1表示迭代器是安全的
    return i;
}

// 通过迭代器来迭代字典元素
dictEntry *dictNext(dictIterator *iter)
{
    while (1) {
        if (iter->entry == NULL) { // 如果迭代器中的元素为空，表示迭代器是初始化状态，刚开始迭代
            dictht *ht = &iter->d->ht[iter->table]; // 获取字典的hash表
            if (iter->index == -1 && iter->table == 0) { // 
                if (iter->safe)
                    iter->d->iterators++;
                else // 迭代器是初始化状态，且不安全，那就重新把需要迭代字典的指纹给迭代器
                    iter->fingerprint = dictFingerprint(iter->d);
            }
            iter->index++;
			// 如果迭代器的索引值比字典的索引值还大。如果迭代器迭代的字典正在rehash并迭代器的hash表为空，直接初始化迭代器;否则就停止迭代
            if (iter->index >= (long) ht->size) {
                if (dictIsRehashing(iter->d) && iter->table == 0) {
                    iter->table++;
                    iter->index = 0;
                    ht = &iter->d->ht[1];
                } else {
                    break;
                }
            }
            iter->entry = ht->table[iter->index];
        } else { // 表示迭代器已经开始迭代，直接开始遍历字典元素
            iter->entry = iter->nextEntry;
        }
		// 迭代器获取到一个元素，就像这个元素返回，并迭代器的后向指针指向下一个元素
        if (iter->entry) {
            /* We need to save the 'next' here, the iterator user
             * may delete the entry we are returning. */
            iter->nextEntry = iter->entry->next;
            return iter->entry;
        }
    }
    return NULL;
}
// 释放字典迭代器内存
void dictReleaseIterator(dictIterator *iter)
{
    if (!(iter->index == -1 && iter->table == 0)) {
        if (iter->safe)
            iter->d->iterators--;
        else
            assert(iter->fingerprint == dictFingerprint(iter->d));
    }
    zfree(iter);
}

/* Return a random entry from the hash table. Useful to
 * implement randomized algorithms */
 // 从字典中通过随机的key获取键值对实体
dictEntry *dictGetRandomKey(dict *d)
{
    dictEntry *he, *orighe;
    unsigned long h;
    int listlen, listele;

    if (dictSize(d) == 0) return NULL;
    if (dictIsRehashing(d)) _dictRehashStep(d);
    if (dictIsRehashing(d)) { // 如果字典正在rehash
        do {
            /* We are sure there are no elements in indexes from 0
             * to rehashidx-1 */
             // 我们必须确保0到rehashidx-1之内没有元素
             // 获取随机的索引(必须在rehashidx之后)
            h = d->rehashidx + (random() % (d->ht[0].size +
                                            d->ht[1].size -
                                            d->rehashidx));
            // 必须要在新表中获取指定随机key在hash表中所在位置的桶
            he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
                                      d->ht[0].table[h];
        } while(he == NULL);
    } else { // 如果没有rehash,就只需要在字典d中获取key的所在的桶，不需要涉及两个表
        do {
            h = random() & d->ht[0].sizemask;
            he = d->ht[0].table[h];
        } while(he == NULL);
    }

    /* Now we found a non empty bucket, but it is a linked
     * list and we need to get a random element from the list.
     * The only sane way to do so is counting the elements and
     * select a random index. */
    listlen = 0;
    orighe = he; // 获取到的桶
    while(he) { // 遍历桶中的链表结构
        he = he->next;
        listlen++;
    }
    // 从桶中随机得到一个元素
    listele = random() % listlen;
    he = orighe;
    while(listele--) he = he->next;
    return he;
}

/* This function samples the dictionary to return a few keys from random
 * locations.
 *
 * It does not guarantee to return all the keys specified in 'count', nor
 * it does guarantee to return non-duplicated elements, however it will make
 * some effort to do both things.
 *
 * Returned pointers to hash table entries are stored into 'des' that
 * points to an array of dictEntry pointers. The array must have room for
 * at least 'count' elements, that is the argument we pass to the function
 * to tell how many random elements we need.
 *
 * The function returns the number of items stored into 'des', that may
 * be less than 'count' if the hash table has less than 'count' elements
 * inside, or if not enough elements were found in a reasonable amount of
 * steps.
 *
 * Note that this function is not suitable when you need a good distribution
 * of the returned items, but only when you need to "sample" a given number
 * of continuous elements to run some kind of algorithm or to produce
 * statistics. However the function is much faster than dictGetRandomKey()
 * at producing N elements. */
 /* 从字典d中通过随机的多个key获取键值对实体，存放到des中
  * d : 目标字典
  * des : 存放获得的字典实体数组
  * count: 表示需要随机多少个key
  * 
  * 获取的键值对限制只有count对，如果通过key获取到的桶有一个count节点数量的链表，那只能获取这count个键值对，那么不能再去获取其他的桶
  * 如果通过key获取到的每个桶只有一个节点，那可以获取count个桶
  * 如果字典正在rehash，那只能从新表中取桶
  */
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
    unsigned long j; /* internal hash table id, 0 or 1. */ // 哪个表
    unsigned long tables; /* 1 or 2 tables? */ // 有多少个表
    unsigned long stored = 0, maxsizemask;
    unsigned long maxsteps; // 最大需要步数

	// 如果字典长度小于需要获取的key数量
    if (dictSize(d) < count) count = dictSize(d);
    maxsteps = count*10;

    /* Try to do a rehashing work proportional to 'count'. */
    // 确保需要的key指定的键值对实体在新表中
    for (j = 0; j < count; j++) {
        if (dictIsRehashing(d))
            _dictRehashStep(d);
        else
            break;
    }

	// 判断是否在rehash
    tables = dictIsRehashing(d) ? 2 : 1;
    maxsizemask = d->ht[0].sizemask;
    // 如果字典正在rehash,就获取新表掩码
    if (tables > 1 && maxsizemask < d->ht[1].sizemask)
        maxsizemask = d->ht[1].sizemask;

    /* Pick a random point inside the larger table. */
    unsigned long i = random() & maxsizemask; // 获取一个随机索引
    unsigned long emptylen = 0; /* Continuous empty entries so far. */
    while(stored < count && maxsteps--) {
        for (j = 0; j < tables; j++) {
            /* Invariant of the dict.c rehashing: up to the indexes already
             * visited in ht[0] during the rehashing, there are no populated
             * buckets, so we can skip ht[0] for indexes between 0 and idx-1. */
             // 如果获取的随机索引是正在rehash的旧表中，我们需要跳过旧表
            if (tables == 2 && j == 0 && i < (unsigned long) d->rehashidx) {
                /* Moreover, if we are currently out of range in the second
                 * table, there will be no elements in both tables up to
                 * the current rehashing index, so we jump if possible.
                 * (this happens when going from big to small table). */
                 // 如果我们当前范围在第二张表，并且索引值超过了第二张hash表已经rehash了的大小,就会没有元素
                 // 索引值比新表大小还大，那就让索引值置为rehashidx
                if (i >= d->ht[1].size)
                    i = d->rehashidx;
                else
                    continue;
            }
            if (i >= d->ht[j].size) continue; /* Out of range for this table. */
            dictEntry *he = d->ht[j].table[i];

            /* Count contiguous empty buckets, and jump to other
             * locations if they reach 'count' (with a minimum of 5). */
            if (he == NULL) {
                emptylen++; // 连续遍历到的空桶数
                // 空桶数>=5或者大于需要的key数量
                if (emptylen >= 5 && emptylen > count) {
                    i = random() & maxsizemask;
                    emptylen = 0;
                }
            } else {
                emptylen = 0;
                while (he) { // 获取这个桶中所有的元素
                    /* Collect all the elements of the buckets found non
                     * empty while iterating. */
                    *des = he;
                    des++;
                    he = he->next;
                    stored++;
                    if (stored == count) return stored;
                }
            }
        }
        i = (i+1) & maxsizemask;
    }
    return stored;
}

/* Function to reverse bits. Algorithm from:
 * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
// 颠倒输入的值的bit位
static unsigned long rev(unsigned long v) { // 8 1000
    unsigned long s = 8 * sizeof(v); // bit size; must be power of 2
    unsigned long mask = ~0; // 111111111
    while ((s >>= 1) > 0) {// 每次向右移一位 
        mask ^= (mask << s);  // 这里是获取
        v = ((v >> s) & mask) | ((v << s) & ~mask);
    }
    return v;
}

/* dictScan() is used to iterate over the elements of a dictionary.
 *
 * Iterating works the following way:
 *
 * 1) Initially you call the function using a cursor (v) value of 0.
 * 2) The function performs one step of the iteration, and returns the
 *    new cursor value you must use in the next call.
 * 3) When the returned cursor is 0, the iteration is complete.
 *
 * The function guarantees all elements present in the
 * dictionary get returned between the start and end of the iteration.
 * However it is possible some elements get returned multiple times.
 *
 * For every element returned, the callback argument 'fn' is
 * called with 'privdata' as first argument and the dictionary entry
 * 'de' as second argument.
 *
 * HOW IT WORKS.
 *
 * The iteration algorithm was designed by Pieter Noordhuis.
 * The main idea is to increment a cursor starting from the higher order
 * bits. That is, instead of incrementing the cursor normally, the bits
 * of the cursor are reversed, then the cursor is incremented, and finally
 * the bits are reversed again.
 *
 * This strategy is needed because the hash table may be resized between
 * iteration calls.
 *
 * dict.c hash tables are always power of two in size, and they
 * use chaining, so the position of an element in a given table is given
 * by computing the bitwise AND between Hash(key) and SIZE-1
 * (where SIZE-1 is always the mask that is equivalent to taking the rest
 *  of the division between the Hash of the key and SIZE).
 *
 * For example if the current hash table size is 16, the mask is
 * (in binary) 1111. The position of a key in the hash table will always be
 * the last four bits of the hash output, and so forth.
 *
 * WHAT HAPPENS IF THE TABLE CHANGES IN SIZE?
 *
 * If the hash table grows, elements can go anywhere in one multiple of
 * the old bucket: for example let's say we already iterated with
 * a 4 bit cursor 1100 (the mask is 1111 because hash table size = 16).
 *
 * If the hash table will be resized to 64 elements, then the new mask will
 * be 111111. The new buckets you obtain by substituting in ??1100
 * with either 0 or 1 can be targeted only by keys we already visited
 * when scanning the bucket 1100 in the smaller hash table.
 *
 * By iterating the higher bits first, because of the inverted counter, the
 * cursor does not need to restart if the table size gets bigger. It will
 * continue iterating using cursors without '1100' at the end, and also
 * without any other combination of the final 4 bits already explored.
 *
 * Similarly when the table size shrinks over time, for example going from
 * 16 to 8, if a combination of the lower three bits (the mask for size 8
 * is 111) were already completely explored, it would not be visited again
 * because we are sure we tried, for example, both 0111 and 1111 (all the
 * variations of the higher bit) so we don't need to test it again.
 *
 * WAIT... YOU HAVE *TWO* TABLES DURING REHASHING!
 *
 * Yes, this is true, but we always iterate the smaller table first, then
 * we test all the expansions of the current cursor into the larger
 * table. For example if the current cursor is 101 and we also have a
 * larger table of size 16, we also test (0)101 and (1)101 inside the larger
 * table. This reduces the problem back to having only one table, where
 * the larger one, if it exists, is just an expansion of the smaller one.
 *
 * LIMITATIONS
 *
 * This iterator is completely stateless, and this is a huge advantage,
 * including no additional memory used.
 *
 * The disadvantages resulting from this design are:
 *
 * 1) It is possible we return elements more than once. However this is usually
 *    easy to deal with in the application level.
 * 2) The iterator must return multiple elements per call, as it needs to always
 *    return all the keys chained in a given bucket, and all the expansions, so
 *    we are sure we don't miss keys moving during rehashing.
 * 3) The reverse cursor is somewhat hard to understand at first, but this
 *    comment is supposed to help.
 */
/*
    * 通过迭代器迭代整个字典
    * 这篇文章解释的比较清楚：https://blog.csdn.net/gqtcgq/article/details/50533336
    * 官方给出的解释：https://github.com/antirez/redis/pull/579#issuecomment-16871583
    * 最高位+1,向低位进位，在字典不稳定的情况下，既要遍历到所有没被删除的元素，又要尽可能较少的重复遍历。这么做是最好的
    *  hash表的扩容和缩小都是以2的幂次进行的,如果使用这种方法，每次hash表改变时，游标的状态都不会变。游标所遍历的节点大部分都会在改变后表的游标的前面，不用再被遍历。
    * 
    * 例如：
    * 每次加法都是最高位+1,向低位进位
    * hash为8位时：000 -> 100 -> 010 -> 110 -> 001 -> 101 -> 011 -> 111 -> 000
    * 
    * hash表为16位时：0000 -> 1000 -> 0100 -> 1100 -> 0010 -> 1010 -> 0110 -> 
    * 1110 -> 0001 -> 1001 -> 0101 -> 1101 -> 0011 -> 1011 -> 
    * 0111 -> 1111 -> 0000
    * 
    * 当我在hash表为8位时，游标是110,我已经遍历了000,100,010;如果hash表扩张到16位，游标在新表中的位置是0110
    * 看一下游标之前的有哪些，0000,1000,0100,1100,0010,1010;恰好，扩张前已经遍历的都在其中。
    * 
 */
unsigned long dictScan(dict *d,
                       unsigned long v,
                       dictScanFunction *fn,
                       dictScanBucketFunction* bucketfn,
                       void *privdata)
{
    dictht *t0, *t1;
    const dictEntry *de, *next;
    unsigned long m0, m1;

	// 如果字典大小为0
    if (dictSize(d) == 0) return 0;

	// 如果字典没有在rehash,那么只有一个表存在数据
    if (!dictIsRehashing(d)) {
        t0 = &(d->ht[0]); // 让指针t0指向字典的hash表
        m0 = t0->sizemask; // m0表示字典掩码

        /* Emit entries at cursor */
        // 传入的桶函数指针,如果函数指针指向的函数返回不为NULL
        if (bucketfn) bucketfn(privdata, &t0->table[v & m0]);
        de = t0->table[v & m0]; // 指针de指向hash表中的一个桶
        while (de) { // 迭代桶中的元素
            next = de->next;
            fn(privdata, de);
            de = next;
        }

        /* Set unmasked bits so incrementing the reversed cursor
         * operates on the masked bits */
        v |= ~m0; // v是游标，用来标记字典中迭代的位置

        /* Increment the reverse cursor */
        v = rev(v); // 将游标化为二进制，并颠倒，让其高位变为低位
        v++; // 然后+1,这里加的低位是变为低位的高位
        v = rev(v); // 这里就将加1后的高位重新颠倒回来，这时就变为对高位+1

    } else {
        t0 = &d->ht[0];
        t1 = &d->ht[1];

        /* Make sure t0 is the smaller and t1 is the bigger table */
        if (t0->size > t1->size) {
            t0 = &d->ht[1];
            t1 = &d->ht[0];
        }

        m0 = t0->sizemask;
        m1 = t1->sizemask;

        /* Emit entries at cursor */
        if (bucketfn) bucketfn(privdata, &t0->table[v & m0]);
        de = t0->table[v & m0];
        while (de) {
            next = de->next;
            fn(privdata, de);
            de = next;
        }

        /* Iterate over indices in larger table that are the expansion
         * of the index pointed to by the cursor in the smaller table */
        do {
            /* Emit entries at cursor */
            if (bucketfn) bucketfn(privdata, &t1->table[v & m1]);
            de = t1->table[v & m1];
            while (de) {
                next = de->next;
                fn(privdata, de);
                de = next;
            }

            /* Increment the reverse cursor not covered by the smaller mask.*/
            v |= ~m1;
            v = rev(v);
            v++;
            v = rev(v);

            /* Continue while bits covered by mask difference is non-zero */
        } while (v & (m0 ^ m1));
    }

    return v;
}

/* ------------------------- private functions ------------------------------ */

/* Expand the hash table if needed */
// 如果需要的话，扩展hash表
static int _dictExpandIfNeeded(dict *d)
{
    /* Incremental rehashing already in progress. Return. */
    // 如果字典正在rehash,就不需要扩展
    if (dictIsRehashing(d)) return DICT_OK;
	
    /* If the hash table is empty expand it to the initial size. */
    // 如果字典大小为0,则为字典扩展默认大小
    if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);

    /* If we reached the 1:1 ratio, and we are allowed to resize the hash
     * table (global setting) or we should avoid it but the ratio between
     * elements/buckets is over the "safe" threshold, we resize doubling
     * the number of buckets. */
     // 如果字典被用的元素数大于hash表桶数，并且字典能被改变，或者元素与桶的比率大于平均桶深度
    if (d->ht[0].used >= d->ht[0].size &&
        (dict_can_resize ||
         d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
    {
		// 那么就为字典扩容两倍
        return dictExpand(d, d->ht[0].used*2);
    }
    // 否则就不用扩容
    return DICT_OK;
}

/* Our hash table capability is a power of two */
// hash表的容量是2的次幂，如果输入的size不是2的次幂，就需要将其改造为2的次幂
static unsigned long _dictNextPower(unsigned long size)
{
    unsigned long i = DICT_HT_INITIAL_SIZE; // hash表的初始大小

    if (size >= LONG_MAX) return LONG_MAX + 1LU; // 如果给出的大小比最大值还打，则返回最大值
    while(1) { // 
        if (i >= size)
            return i;
        i *= 2;
    }
}

/* Returns the index of a free slot that can be populated with
 * a hash entry for the given 'key'.
 * If the key already exists, -1 is returned
 * and the optional output parameter may be filled.
 *
 * Note that if we are in the process of rehashing the hash table, the
 * index is always returned in the context of the second (new) hash table. */
 // 通过需要添加的key获取需要添加的key在字典中的索引
static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
{
    unsigned long idx, table;
    dictEntry *he;
    if (existing) *existing = NULL; // 如果existing不为空

    /* Expand the hash table if needed */
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return -1;
    for (table = 0; table <= 1; table++) {
        idx = hash & d->ht[table].sizemask; // key的hash值与掩码，获得索引值
        /* Search if this slot does not already contain the given key */
        he = d->ht[table].table[idx]; // 通过索引搜索到桶
        while(he) { // 如果通过索引能获取到桶，就遍历桶中节点，如果存在相同的key，就返回-1,并将相同的实体存储到existing,中表示不能添加这个新key
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                if (existing) *existing = he;
                return -1;
            }
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
    }
    // 最终如果没有找到存在的key,就表示新key能被添加，这时就返回索引值
    return idx;
}

// 将字典置空
void dictEmpty(dict *d, void(callback)(void*)) {
	// 初始化新旧表，rehash索引以及迭代器
    _dictClear(d,&d->ht[0],callback);
    _dictClear(d,&d->ht[1],callback);
    d->rehashidx = -1;
    d->iterators = 0;
}

// 设置字典能被改变
void dictEnableResize(void) {
    dict_can_resize = 1;
}

// 设置字典禁止被改变
void dictDisableResize(void) {
    dict_can_resize = 0;
}

// 字典通过key获取hash值
uint64_t dictGetHash(dict *d, const void *key) {
    return dictHashKey(d, key);
}

/* Finds the dictEntry reference by using pointer and pre-calculated hash.
 * oldkey is a dead pointer and should not be accessed.
 * the hash value should be provided using dictGetHash.
 * no string / key comparison is performed.
 * return value is the reference to the dictEntry if found, or NULL if not found. */
 // 通过指针(指针是死指针，不能被访问)和hash值找到字典中的元素
dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, uint64_t hash) {
    dictEntry *he, **heref;
    unsigned long idx, table;

    if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */
    for (table = 0; table <= 1; table++) {
        idx = hash & d->ht[table].sizemask; // 通过hash找到索引值
        heref = &d->ht[table].table[idx]; // 通过索引找到hash表中桶
        he = *heref;
        while(he) {
            if (oldptr==he->key) // 如果有与指针对应的key
                return heref; // 就返回桶
            heref = &he->next; // 否则重新遍历
            he = *heref;
        }
        if (!dictIsRehashing(d)) return NULL;
    }
    return NULL;
}

/* ------------------------------- Debugging ---------------------------------*/

#define DICT_STATS_VECTLEN 50
size_t _dictGetStatsHt(char *buf, size_t bufsize, dictht *ht, int tableid) {
    unsigned long i, slots = 0, chainlen, maxchainlen = 0;
    unsigned long totchainlen = 0;
    unsigned long clvector[DICT_STATS_VECTLEN];
    size_t l = 0;

    if (ht->used == 0) {
        return snprintf(buf,bufsize,
            "No stats available for empty dictionaries\n");
    }

    /* Compute stats. */
    for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;
    for (i = 0; i < ht->size; i++) {
        dictEntry *he;

        if (ht->table[i] == NULL) {
            clvector[0]++;
            continue;
        }
        slots++;
        /* For each hash entry on this slot... */
        chainlen = 0;
        he = ht->table[i];
        while(he) {
            chainlen++;
            he = he->next;
        }
        clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++;
        if (chainlen > maxchainlen) maxchainlen = chainlen;
        totchainlen += chainlen;
    }

    /* Generate human readable stats. */
    l += snprintf(buf+l,bufsize-l,
        "Hash table %d stats (%s):\n"
        " table size: %ld\n"
        " number of elements: %ld\n"
        " different slots: %ld\n"
        " max chain length: %ld\n"
        " avg chain length (counted): %.02f\n"
        " avg chain length (computed): %.02f\n"
        " Chain length distribution:\n",
        tableid, (tableid == 0) ? "main hash table" : "rehashing target",
        ht->size, ht->used, slots, maxchainlen,
        (float)totchainlen/slots, (float)ht->used/slots);

    for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
        if (clvector[i] == 0) continue;
        if (l >= bufsize) break;
        l += snprintf(buf+l,bufsize-l,
            "   %s%ld: %ld (%.02f%%)\n",
            (i == DICT_STATS_VECTLEN-1)?">= ":"",
            i, clvector[i], ((float)clvector[i]/ht->size)*100);
    }

    /* Unlike snprintf(), teturn the number of characters actually written. */
    if (bufsize) buf[bufsize-1] = '\0';
    return strlen(buf);
}

void dictGetStats(char *buf, size_t bufsize, dict *d) {
    size_t l;
    char *orig_buf = buf;
    size_t orig_bufsize = bufsize;

    l = _dictGetStatsHt(buf,bufsize,&d->ht[0],0);
    buf += l;
    bufsize -= l;
    if (dictIsRehashing(d) && bufsize > 0) {
        _dictGetStatsHt(buf,bufsize,&d->ht[1],1);
    }
    /* Make sure there is a NULL term at the end. */
    if (orig_bufsize) orig_buf[orig_bufsize-1] = '\0';
}

/* ------------------------------- Benchmark ---------------------------------*/

#ifdef DICT_BENCHMARK_MAIN

#include "sds.h"

uint64_t hashCallback(const void *key) {
    return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
}

int compareCallback(void *privdata, const void *key1, const void *key2) {
    int l1,l2;
    DICT_NOTUSED(privdata);

    l1 = sdslen((sds)key1);
    l2 = sdslen((sds)key2);
    if (l1 != l2) return 0;
    return memcmp(key1, key2, l1) == 0;
}

void freeCallback(void *privdata, void *val) {
    DICT_NOTUSED(privdata);

    sdsfree(val);
}

dictType BenchmarkDictType = {
    hashCallback,
    NULL,
    NULL,
    compareCallback,
    freeCallback,
    NULL
};

#define start_benchmark() start = timeInMilliseconds()
#define end_benchmark(msg) do { \
    elapsed = timeInMilliseconds()-start; \
    printf(msg ": %ld items in %lld ms\n", count, elapsed); \
} while(0);

/* dict-benchmark [count] */
int main(int argc, char **argv) {
    long j;
    long long start, elapsed;
    dict *dict = dictCreate(&BenchmarkDictType,NULL);
    long count = 0;

    if (argc == 2) {
        count = strtol(argv[1],NULL,10);
    } else {
        count = 5000000;
    }

    start_benchmark();
    for (j = 0; j < count; j++) {
        int retval = dictAdd(dict,sdsfromlonglong(j),(void*)j);
        assert(retval == DICT_OK);
    }
    end_benchmark("Inserting");
    assert((long)dictSize(dict) == count);

    /* Wait for rehashing. */
    while (dictIsRehashing(dict)) {
        dictRehashMilliseconds(dict,100);
    }

    start_benchmark();
    for (j = 0; j < count; j++) {
        sds key = sdsfromlonglong(j);
        dictEntry *de = dictFind(dict,key);
        assert(de != NULL);
        sdsfree(key);
    }
    end_benchmark("Linear access of existing elements");

    start_benchmark();
    for (j = 0; j < count; j++) {
        sds key = sdsfromlonglong(j);
        dictEntry *de = dictFind(dict,key);
        assert(de != NULL);
        sdsfree(key);
    }
    end_benchmark("Linear access of existing elements (2nd round)");

    start_benchmark();
    for (j = 0; j < count; j++) {
        sds key = sdsfromlonglong(rand() % count);
        dictEntry *de = dictFind(dict,key);
        assert(de != NULL);
        sdsfree(key);
    }
    end_benchmark("Random access of existing elements");

    start_benchmark();
    for (j = 0; j < count; j++) {
        sds key = sdsfromlonglong(rand() % count);
        key[0] = 'X';
        dictEntry *de = dictFind(dict,key);
        assert(de == NULL);
        sdsfree(key);
    }
    end_benchmark("Accessing missing");

    start_benchmark();
    for (j = 0; j < count; j++) {
        sds key = sdsfromlonglong(j);
        int retval = dictDelete(dict,key);
        assert(retval == DICT_OK);
        key[0] += 17; /* Change first number to letter. */
        retval = dictAdd(dict,key,(void*)j);
        assert(retval == DICT_OK);
    }
    end_benchmark("Removing and adding");
}
#endif