跳转至

C++ 原生实现 MD5 哈希算法

一段不使用外部库的 MD5 实现。代码涵盖了填充、分块、四轮 64 步压缩以及小端序输出等完整流程。逻辑基本正确,但存在一处内存泄漏和若干类型安全问题。

原始代码问题

内存泄漏

add() 函数内 new unsigned int[num*16] 分配了动态数组,返回给 getMD5() 使用后从未 delete[] 释放。每次调用 getMD5() 都会泄露一块堆内存。

changeHex 参数类型

函数签名 changeHex(int a) 接受有符号整数,内部对 a 做位移和取模运算。当哈希中间值高位为 1 时(即 unsigned int 下 ≥ 0x80000000),int 将其解释为负数,右移操作行为由实现定义,且 % 取模对负数结果不直观。应改为 unsigned int

全局状态变量

atempbtempctempdtempstrlength 定义在全局作用域,导致函数不可重入——同时计算两个字符串的 MD5 会互相覆盖中间状态。对于单线程演示程序影响不大,但应知晓这一限制。

shift 宏的边界隐患

#define shift(x, n) (((x) << (n)) | ((x) >> (32-(n))))

n == 0 时,右移 32 位在 32 位无符号整数上是未定义行为。MD5 规范中旋转移位量均不为 0 或 32,实际不会触发,但作为通用位旋转宏不够健壮。

修正后代码

#include <iostream>
#include <string>
using namespace std;

#define shift(x, n) (((x) << (n)) | ((x) >> (32-(n))))
#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
#define H(x, y, z) ((x) ^ (y) ^ (z))
#define I(x, y, z) ((y) ^ ((x) | (~z)))
#define A 0x67452301
#define B 0xefcdab89
#define C 0x98badcfe
#define D 0x10325476

unsigned int strlength;
unsigned int atemp;
unsigned int btemp;
unsigned int ctemp;
unsigned int dtemp;

const unsigned int k[] = {
    0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee,
    0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501,0x698098d8,
    0x8b44f7af,0xffff5bb1,0x895cd7be,0x6b901122,0xfd987193,
    0xa679438e,0x49b40821,0xf61e2562,0xc040b340,0x265e5a51,
    0xe9b6c7aa,0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8,
    0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed,0xa9e3e905,
    0xfcefa3f8,0x676f02d9,0x8d2a4c8a,0xfffa3942,0x8771f681,
    0x6d9d6122,0xfde5380c,0xa4beea44,0x4bdecfa9,0xf6bb4b60,
    0xbebfbc70,0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05,
    0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665,0xf4292244,
    0x432aff97,0xab9423a7,0xfc93a039,0x655b59c3,0x8f0ccc92,
    0xffeff47d,0x85845dd1,0x6fa87e4f,0xfe2ce6e0,0xa3014314,
    0x4e0811a1,0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391};

const unsigned int s[] = {7,12,17,22,7,12,17,22,7,12,17,22,7,
    12,17,22,5,9,14,20,5,9,14,20,5,9,14,20,5,9,14,20,
    4,11,16,23,4,11,16,23,4,11,16,23,4,11,16,23,6,10,
    15,21,6,10,15,21,6,10,15,21,6,10,15,21};

const char str16[] = "0123456789abcdef";

void mainLoop(unsigned int M[])
{
    unsigned int f, g;
    unsigned int a = atemp;
    unsigned int b = btemp;
    unsigned int c = ctemp;
    unsigned int d = dtemp;
    for (unsigned int i = 0; i < 64; i++)
    {
        if (i < 16) {
            f = F(b, c, d);
            g = i;
        } else if (i < 32) {
            f = G(b, c, d);
            g = (5 * i + 1) % 16;
        } else if (i < 48) {
            f = H(b, c, d);
            g = (3 * i + 5) % 16;
        } else {
            f = I(b, c, d);
            g = (7 * i) % 16;
        }
        unsigned int tmp = d;
        d = c;
        c = b;
        b = b + shift((a + f + k[i] + M[g]), s[i]);
        a = tmp;
    }
    atemp = a + atemp;
    btemp = b + btemp;
    ctemp = c + ctemp;
    dtemp = d + dtemp;
}

unsigned int* add(string str)
{
    unsigned int num = ((str.length() + 8) / 64) + 1;
    unsigned int *strByte = new unsigned int[num * 16];
    strlength = num * 16;
    for (unsigned int i = 0; i < num * 16; i++)
        strByte[i] = 0;
    for (unsigned int i = 0; i < str.length(); i++)
    {
        strByte[i >> 2] |= (str[i]) << ((i % 4) * 8);
    }
    strByte[str.length() >> 2] |= 0x80 << (((str.length() % 4)) * 8);
    strByte[num * 16 - 2] = str.length() * 8;
    return strByte;
}

string changeHex(unsigned int a)                    // 修正:int 改为 unsigned int
{
    unsigned int b;                                  // 修正:int 改为 unsigned int
    string str1;
    string str = "";
    for (int i = 0; i < 4; i++)
    {
        str1 = "";
        b = ((a >> i * 8) % (1 << 8)) & 0xff;
        for (int j = 0; j < 2; j++)
        {
            str1.insert(0, 1, str16[b % 16]);
            b = b / 16;
        }
        str += str1;
    }
    return str;
}

string getMD5(string source)
{
    atemp = A;
    btemp = B;
    ctemp = C;
    dtemp = D;
    unsigned int *strByte = add(source);
    for (unsigned int i = 0; i < strlength / 16; i++)
    {
        unsigned int num[16];
        for (unsigned int j = 0; j < 16; j++)
            num[j] = strByte[i * 16 + j];
        mainLoop(num);
    }
    string result = changeHex(atemp)
        .append(changeHex(btemp))
        .append(changeHex(ctemp))
        .append(changeHex(dtemp));
    delete[] strByte;                                // 新增:释放动态分配的内存
    return result;
}

int main()
{
    string ss;
//  cin >> ss;
    string s = getMD5("abc");
    cout << s;
    return 0;
}

改动集中在两处:changeHex 的参数和局部变量从 int 改为 unsigned int,消除有符号整数位移的歧义;getMD5 末尾补充 delete[] strByte,修复内存泄漏。

已知测试向量 MD5("abc") 应输出 900150983cd24fb0d6963f7d28e17f72