Redis源码分析–RDB实现源码阅读
重要说明,在看这篇文章之前,最好先通过剖析Redis RDB文件 了解RDB文件的结构;
RDB相关源码在rdb.c中;通过saveCommand(redisClient *c) 和bgsaveCommand(redisClient *c) 两个方法可知,RDB持久化业务逻辑在rdbSave(server.rdb_filename)和rdbSaveBackground(server.rdb_filename这两个方法中;一个通过执行"save"触发,另一个通过执行"bgsave"或者save seconds changes条件满足时(在redis.c的serverCron中)触发:
redis.c里serverCron中通过调用rdbSaveBackground(server.rdb_filename)触发bgsave的部分代码:
if (server.dirty >= sp->changes &&
server.unixtime-server.lastsave > sp->seconds &&
(server.unixtime-server.lastbgsave_try >
REDIS_BGSAVE_RETRY_DELAY ||
server.lastbgsave_status == REDIS_OK))
{
redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
sp->changes, (int)sp->seconds);
rdbSaveBackground(server.rdb_filename);
break;
}
通过阅读rdbSaveBackground(char *filename)的源码可知,其最终的实现还是调用rdbSave(char *filename),只不过是通过fork()出的子进程来执行罢了,所以bgsave和save的实现是殊途同归:
int rdbSaveBackground(char *filename) {
pid_t childpid;
long long start;
// 如果已经有RDB持久化任务,那么rdb_child_pid的值就不是-1,那么返回REDIS_ERR;
if (server.rdb_child_pid != -1) return REDIS_ERR;
server.dirty_before_bgsave = server.dirty;
server.lastbgsave_try = time(NULL);
// 记录RDB持久化开始时间
start = ustime();
//fork一个子进程,
if ((childpid = fork()) == 0) {
// 如果fork()的结果childpid为0,即当前进程为fork的子进程,那么接下来调用rdbSave()进程持久化;
int retval;
/* Child */
closeListeningSockets(0);
redisSetProcTitle("redis-rdb-bgsave");
// bgsave事实上就是通过fork的子进程调用rdbSave()实现, rdbSave()就是save命令业务实现;
retval = rdbSave(filename);
if (retval == REDIS_OK) {
size_t private_dirty = zmalloc_get_private_dirty();
if (private_dirty) {
// RDB持久化成功后,如果是notice级别的日志,那么log输出RDB过程中copy-on-write使用的内存
redisLog(REDIS_NOTICE,
"RDB: %zu MB of memory used by copy-on-write",
private_dirty/(1024*1024));
}
}
exitFromChild((retval == REDIS_OK) ? 0 : 1);
} else {
// 父进程更新redisServer记录一些信息,例如:fork进程消耗的时间stat_fork_time,
/* Parent */
server.stat_fork_time = ustime()-start;
// 更新redisServer记录fork速率:每秒多少G;zmalloc_used_memory()的单位是字节,所以通过除以(1024*1024*1024),得到GB;由于记录的fork_time即fork时间是微妙,所以*1000000,得到每秒钟fork多少GB的速度;
server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */
latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000);
// 如果fork子进程出错,即childpid为-1,更新redisServer,记录最后一次bgsave状态是REDIS_ERR;
if (childpid == -1) {
server.lastbgsave_status = REDIS_ERR;
redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
strerror(errno));
return REDIS_ERR;
}
redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
// 最后在redisServer中记录的save开始时间重置为空,并记录执行bgsave的子进程id,即child_pid;
server.rdb_save_time_start = time(NULL);
server.rdb_child_pid = childpid;
server.rdb_child_type = REDIS_RDB_CHILD_TYPE_DISK;
updateDictResizePolicy();
return REDIS_OK;
}
return REDIS_OK; /* unreached */
}
RDB持久化实现:
/* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success. */
int rdbSave(char *filename) {
char tmpfile[256];
FILE *fp;
rio rdb;
int error;
// 文件临时文件名为temp-${pid}.rdb
snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
fp = fopen(tmpfile,"w");
if (!fp) {
redisLog(REDIS_WARNING, "Failed opening .rdb for saving: %s",
strerror(errno));
return REDIS_ERR;
}
rioInitWithFile(&rdb,fp);
// RDB持久化的核心实现;
if (rdbSaveRio(&rdb,&error) == REDIS_ERR) {
errno = error;
goto werr;
}
/* Make sure data will not remain on the OS's output buffers */
if (fflush(fp) == EOF) goto werr;
if (fsync(fileno(fp)) == -1) goto werr;
if (fclose(fp) == EOF) goto werr;
// 重命名rdb文件的命名;
/* Use RENAME to make sure the DB file is changed atomically only
* if the generate DB file is ok. */
if (rename(tmpfile,filename) == -1) {
redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
unlink(tmpfile);
return REDIS_ERR;
}
redisLog(REDIS_NOTICE,"DB saved on disk");
server.dirty = 0;
server.lastsave = time(NULL);
server.lastbgsave_status = REDIS_OK;
return REDIS_OK;
werr:
redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
fclose(fp);
unlink(tmpfile);
return REDIS_ERR;
}
rdbSaveRio–RDB持久化实现的核心代码–根据RDB文件协议将所有redis中的key-value写入rdb文件中:
/* Produces a dump of the database in RDB format sending it to the specified
* Redis I/O channel. On success REDIS_OK is returned, otherwise REDIS_ERR
* is returned and part of the output, or all the output, can be
* missing because of I/O errors.
*
* When the function returns REDIS_ERR and if 'error' is not NULL, the
* integer pointed by 'error' is set to the value of errno just after the I/O
* error. */
int rdbSaveRio(rio *rdb, int *error) {
dictIterator *di = NULL;
dictEntry *de;
char magic[10];
int j;
long long now = mstime();
uint64_t cksum;
if (server.rdb_checksum)
rdb->update_cksum = rioGenericUpdateChecksum;
// rdb文件中最先写入的内容就是magic,magic就是REDIS这个字符串+4位版本号
snprintf(magic,sizeof(magic),"REDIS%04d",REDIS_RDB_VERSION);
if (rdbWriteRaw(rdb,magic,9) == -1) goto werr;
// 遍历所有db重写rdb文件;
for (j = 0; j < server.dbnum; j++) {
redisDb *db = server.db+j;
dict *d = db->dict;
// 如果db的size为0,即没有任何key,那么跳过,遍历下一个db;
if (dictSize(d) == 0) continue;
di = dictGetSafeIterator(d);
if (!di) return REDIS_ERR;
// 写入REDIS_RDB_OPCODE_SELECTDB,这个值redis定义为254,即FE,再通过rdbSaveLen合入当前dbnum,例如当前db为0,那么写入FE 00
/* Write the SELECT DB opcode */
if (rdbSaveType(rdb,REDIS_RDB_OPCODE_SELECTDB) == -1) goto werr;
if (rdbSaveLen(rdb,j) == -1) goto werr;
// 如注释所表达的,迭代遍历db这个dict的每一个entry;
/* Iterate this DB writing every entry */
while((de = dictNext(di)) != NULL) {
// 先得到当前entry的key(sds类型)和value(redisObject类型);
sds keystr = dictGetKey(de);
robj key, *o = dictGetVal(de);
long long expire;
initStaticStringObject(key,keystr);
// 从redisDb的expire这个dict中查询过期时间属性值;
expire = getExpire(db,&key);
// 每个entry(redis中的key和其value)rdb持久化的核心代码
if (rdbSaveKeyValuePair(rdb,&key,o,expire,now) == -1) goto werr;
}
dictReleaseIterator(di);
}
di = NULL; /* So that we don't release it again on error. */
// 遍历所有db后,写入EOF这个opcode,REDIS_RDB_OPCODE_EOF申明为255,即FF,所以是写入FF到rdb文件中;FF是redis对rdb文件结束的定义;
/* EOF opcode */
if (rdbSaveType(rdb,REDIS_RDB_OPCODE_EOF) == -1) goto werr;
// 最后写入8个字节长度的checksum值到rdb文件尾部;
/* CRC64 checksum. It will be zero if checksum computation is disabled, the
* loading code skips the check in this case. */
cksum = rdb->cksum;
memrev64ifbe(&cksum);
if (rioWrite(rdb,&cksum,8) == 0) goto werr;
return REDIS_OK;
werr:
if (error) *error = errno;
if (di) dictReleaseIterator(di);
return REDIS_ERR;
}
每个entry(key-value)rdb持久化的核心代码:
/* Save a key-value pair, with expire time, type, key, value.
* On error -1 is returned.
* On success if the key was actually saved 1 is returned, otherwise 0
* is returned (the key was already expired). */
int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val,
long long expiretime, long long now)
{
/* Save the expire time */
if (expiretime != -1) {
// 如果过期时间少于当前时间,那么表示该key已经失效,返回不做任何保存;
/* If this key is already expired skip it */
if (expiretime < now) return 0;
// 如果当前遍历的entry有失效时间属性,那么保存REDIS_RDB_OPCODE_EXPIRETIME_MS即252,即"FC"以及失效时间到rdb文件中,
if (rdbSaveType(rdb,REDIS_RDB_OPCODE_EXPIRETIME_MS) == -1) return -1;
if (rdbSaveMillisecondTime(rdb,expiretime) == -1) return -1;
}
// 接下来保存redis key的类型,key,以及value到rdb文件中;
/* Save type, key, value */
if (rdbSaveObjectType(rdb,val) == -1) return -1;
if (rdbSaveStringObject(rdb,key) == -1) return -1;
if (rdbSaveObject(rdb,val) == -1) return -1;
return 1;
}
通过上面的源码分析得到最终rdb文件的格式如下:
REDIS // RDB协议约束的固定字符串
0006 // redis的版本号
FE 00 // 表示当前接下来的key都是db=0中的key;
FC 1506327609 // 表示key失效时间点为1506327609
0 // 表示key的属性是string类型;
username // key
afei // value
FF // 表示遍历完成
y73e9iq1 // checksum值备注:
#define REDIS_RDB_TYPE_STRING 0
#define REDIS_RDB_TYPE_LIST 1
#define REDIS_RDB_TYPE_SET 2
#define REDIS_RDB_TYPE_ZSET 3
#define REDIS_RDB_TYPE_HASH 4作者:阿飞的博客
来源:https://www.jianshu.com/p/131cf929a262
看完两件小事
如果你觉得这篇文章对你挺有启发,我想请你帮我两个小忙:
- 把这篇文章分享给你的朋友 / 交流群,让更多的人看到,一起进步,一起成长!
- 关注公众号 「方志朋」,公众号后台回复「666」 免费领取我精心整理的进阶资源教程
本文著作权归作者所有,如若转载,请注明出处
转载请注明:文章转载自「 Java极客技术学习 」https://www.javajike.com