OpenSSL BIO源码简析

Source

1. BIO简介

相关文档

/html/man7/bio.html
/html/man3/BIO_*.html

bio - Basic I/O abstraction,即IO抽象层。

BIO有两种:

  • source/sink BIO,即数据源,如socket BIO、file BIO,初始化接口以BIO_s_开头;
  • filter BIO,过滤器,用来接收和传递数据,初始化接口以BIO_f_开头;

BIO chain

BIO可以组成一条链,即使是单个BIO,实质也是有一个节点的链。

一个链通常由1个source/sink BIO、1个或多个filter BIO组成。

数据从第一个BIO写入或读出,并传递到最后一个节点(通常是source/sink BIO)。

相关api:

  • BIO_push
  • BIO_free_all 释放整个链

BIO数据结构

源码位置:\crypto\bio\bio_local.h

struct bio_st {
    
      
    // BIO_new初始化需要提供libctx和method参数
    // BIO *BIO_new(const BIO_METHOD *method)
    // {
    
      
    //     return BIO_new_ex(NULL, method);
    // }
    OSSL_LIB_CTX *libctx;	// NULL: default contextz
    const BIO_METHOD *method;
    
    
    /* bio, mode, argp, argi, argl, ret */
#ifndef OPENSSL_NO_DEPRECATED_3_0
    BIO_callback_fn callback;
#endif
    BIO_callback_fn_ex callback_ex;
    char *cb_arg;               /* first argument for the callback */
    
    // 这里用int来作标志应该有点浪费
    // 用bool或bit更好些
    int init;		// 初始化标志
    int shutdown;
    int flags;                  /* extra storage */
    int retry_reason;
    int num;
    void *ptr;		// BIO_set_data() 
    
    // bio链本质是双向链表
    struct bio_st *next_bio;    /* used by filter BIOs */
    struct bio_st *prev_bio;    /* used by filter BIOs */
    
    CRYPTO_REF_COUNT references;
    uint64_t num_read;
    uint64_t num_write;
    CRYPTO_EX_DATA ex_data;
    CRYPTO_RWLOCK *lock;	// 线程读写锁
};

BIO_METHOD数据结构

源码路径:\include\internal\bio.h

struct bio_method_st {
    
      
    int type;
    char *name;
    int (*bwrite) (BIO *, const char *, size_t, size_t *);
    int (*bwrite_old) (BIO *, const char *, int);
    int (*bread) (BIO *, char *, size_t, size_t *);
    int (*bread_old) (BIO *, char *, int);
    int (*bputs) (BIO *, const char *);
    int (*bgets) (BIO *, char *, int);
    long (*ctrl) (BIO *, int, long, void *);
    int (*create) (BIO *);
    int (*destroy) (BIO *);
    long (*callback_ctrl) (BIO *, int, BIO_info_cb *);
};

该结构除了类型和名称,其余均是函数指针。

2. Base64示例分析

借用官网Base64示例:https://www.openssl.org/docs/man3.0/man3/BIO_f_base64.html,它将"hello world \n"的base64输出到stdout:

#include <iostream>
#include <openssl/bio.h>
#include <openssl/evp.h>
int main()
{
    
      
	BIO* bio, * b64;
	char message[] = "Hello World \n";

	b64 = BIO_new(BIO_f_base64());
	bio = BIO_new_fp(stdout, BIO_NOCLOSE);
	BIO_push(b64, bio);
	BIO_write(b64, message, strlen(message));
	BIO_flush(b64);
    // SGVsbG8gV29ybGQgCg==
    
	BIO_free_all(b64);
    
	getchar();
	return 0;
}

用vscode在源码目录搜索“base64”,并没有找到实现源码,于是全局搜索编码表,定位到源码路径:

// \crypto\evp\encode.c
static const unsigned char data_bin2ascii[65] =
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

把encode.c拖到vs中,在引用编码表的地方下断点,调试示例程序,成功断下:

请添加图片描述

BIO_flush调用栈如下:

libcrypto-3.dll!evp_encodeblock_int(evp_Encode_Ctx_st * ctx, unsigned char * t, const unsigned char * f, int dlen) Line 238	C
libcrypto-3.dll!EVP_EncodeFinal(evp_Encode_Ctx_st * ctx, unsigned char * out, int * outl) Line 222	C
libcrypto-3.dll!b64_ctrl(bio_st * b, int cmd, long num, void * ptr) Line 506	C
libcrypto-3.dll!BIO_ctrl(bio_st * b, int cmd, long larg, void * parg) Line 579	C
TestOpenSSL.exe!main() Line 48	C++

EVP是OpenSSL的算法实现接口,结合evp文档(/html/man7/evp.html),这里正是base64编解码逻辑:

The EVP_EncodeXXX and EVP_DecodeXXX functions implement base 64 encoding and decoding.

初始化

b64 = BIO_new(BIO_f_base64());
bio = BIO_new_fp(stdout, BIO_NOCLOSE);

BIO_new_fp(),内部其实是调用了BIO_new(BIO_s_file())初始化一个source/sink BIO。所以只分析第一个base64 filter BIO。

BIO_f_base64()返回一个base64的静态BIO_METHOD

// \crypto\evp\bio_b64.c
static const BIO_METHOD methods_b64 = {
    
      
    BIO_TYPE_BASE64,
    "base64 encoding",
    bwrite_conv,
    b64_write,
    bread_conv,
    b64_read,
    b64_puts,
    NULL,                       /* b64_gets, */
    b64_ctrl,
    b64_new,
    b64_free,
    b64_callback_ctrl,
};


const BIO_METHOD *BIO_f_base64(void)
{
    
      
    return &methods_b64;
}

通过BIO_TYPE_BASE64这个宏type,可以在\include\openssl\bio.h定位到其它method type:

/*
	在源码层面,BIO其实是有3种。
*/
/* There are the classes of BIOs */
# define BIO_TYPE_DESCRIPTOR     0x0100 /* socket, fd, connect or accept */
# define BIO_TYPE_FILTER         0x0200
# define BIO_TYPE_SOURCE_SINK    0x0400

/* These are the 'types' of BIOs */
# define BIO_TYPE_NONE             0
# define BIO_TYPE_MEM            ( 1|BIO_TYPE_SOURCE_SINK)
# define BIO_TYPE_FILE           ( 2|BIO_TYPE_SOURCE_SINK)

# define BIO_TYPE_FD             ( 4|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR)
// ...
# define BIO_TYPE_BASE64         (11|BIO_TYPE_FILTER)

BIO_new则根据base 64 method初始化filter BIO:

请添加图片描述

method->createb64_new(),用于初始化bio->ptr==b64_ctx

// \crypto\evp\bio_b64.c
typedef struct b64_struct {
    
      
    /*
     * BIO *bio; moved to the BIO structure
     */
    int buf_len;
    int buf_off;
    int tmp_len;                /* used to find the start when decoding */
    int tmp_nl;                 /* If true, scan until '\n' */
    int encode;
    int start;                  /* have we started decoding yet? */
    int cont;                   /* <= 0 when finished */
    EVP_ENCODE_CTX *base64;		
    char buf[EVP_ENCODE_LENGTH(B64_BLOCK_SIZE) + 10];
    char tmp[B64_BLOCK_SIZE];
} BIO_B64_CTX;

函数栈如下:

>	libcrypto-3.dll!b64_new(bio_st * bi) Line 71	C
    	    BIO_B64_CTX *ctx;

            //...

            BIO_set_data(bi, ctx); // set bi->ptr
            BIO_set_init(bi, 1);	// initialized 
 	libcrypto-3.dll!BIO_new_ex(ossl_lib_ctx_st * libctx, const bio_method_st * method) Line 104	C
        	if (method->create != NULL && !method->create(bio)) {
    
      
                // ...
                goto err;
            }
            if (method->create == NULL)
                bio->init = 1;	// 若没有单独的create初始化函数,则直接设置init标志
 	libcrypto-3.dll!BIO_new(const bio_method_st * method) Line 122	C
        	return BIO_new_ex(NULL, method);
 	TestOpenSSL.exe!main() Line 44	C++

构造BIO链

源码位置:\crypto\bio\bio_lib.c

// BIO_push(b64, bio);
BIO *BIO_push(BIO *b, BIO *bio)
{
    
      
    BIO *lb;

    if (b == NULL)
        return bio;
    lb = b;
    while (lb->next_bio != NULL)
        lb = lb->next_bio;
    lb->next_bio = bio;
    if (bio != NULL)
        bio->prev_bio = lb;
    /* called to do internal processing */
    BIO_ctrl(b, BIO_CTRL_PUSH, 0, lb);
    return b;
}

在BIO链表中,stdout source/sink BIO是在 base64 filter BIO之后的。

写数据

BIO_write(b64, message, strlen(message));
BIO_flush(b64);

再看一下methods_b64:

static const BIO_METHOD methods_b64 = {
    
      
    BIO_TYPE_BASE64,
    "base64 encoding",
    bwrite_conv,	// int (*bwrite) (BIO *, const char *, size_t, size_t *);
    b64_write,		// int (*bwrite_old) (BIO *, const char *, int);
    //...
}

调用BIO_write,其实是调用base64 method的bwritebwrite_old函数,形成如下函数栈:

libcrypto-3.dll!EVP_EncodeInit(evp_Encode_Ctx_st * ctx) Line 156	C
libcrypto-3.dll!b64_write(bio_st * b, const char * in, int inl) Line 346	C
libcrypto-3.dll!bwrite_conv(bio_st * bio, const char * data, unsigned int datal, unsigned int * written) Line 77	C
        ret = bio->method->bwrite_old(bio, data, (int)datal);
libcrypto-3.dll!bio_write_intern(bio_st * b, const void * data, unsigned int dlen, unsigned int * written) Line 362	C
        ret = b->method->bwrite(b, data, dlen, &local_written);
libcrypto-3.dll!BIO_write(bio_st * b, const void * data, int dlen) Line 384	C
TestOpenSSL.exe!main() Line 47	C++

但经过调试,执行BIO_write后标准输出(就是屏幕)并没有回显,而最开始贴出调用栈的BIO_flush刷新缓冲区后才有回显,底层是调用BIO_ctrl()实现的,文档如下:

https://www.openssl.org/docs/man3.0/man3/BIO_flush.html
BIO_flush() normally writes out any internally buffered data, in some cases it is used to signal EOF and that no more data will be written.

函数栈如下:

libcrypto-3.dll!b64_write(bio_st * b, const char * in, int inl) Line 366	C
libcrypto-3.dll!b64_ctrl(bio_st * b, int cmd, long num, void * ptr) Line 490	C
libcrypto-3.dll!BIO_ctrl(bio_st * b, int cmd, long larg, void * parg) Line 579	C
    ret = b->method->ctrl(b, cmd, larg, parg);	// b64_ctrl()
TestOpenSSL.exe!main() Line 48	C++

free

BIO_free_all(b64);

这个函数的作用,猜也能猜的到,遍历链表逐个释放空间。

// \crypto\bio\bio_lib.c
void BIO_free_all(BIO *bio)
{
    
      
    BIO *b;
    int ref;

    while (bio != NULL) {
    
      
        b = bio;	// 从第一个节点开始释放
        ref = b->references;
        bio = bio->next_bio;
        BIO_free(b);
        /* 
        	Since ref count > 1, don't free anyone else.
        	意思是别人还在用 别删
        */
        if (ref > 1)
            break;
    }
}

// 需要释放的东西还是很多的
int BIO_free(BIO *a)
{
    
      
    int ret;

    if (a == NULL)
        return 0;
	
    // 引用数减1
    if (CRYPTO_DOWN_REF(&a->references, &ret, a->lock) <= 0)
        return 0;

    REF_PRINT_COUNT("BIO", a);
    if (ret > 0)
        return 1;
    REF_ASSERT_ISNT(ret < 0);

    if (HAS_CALLBACK(a)) {
    
      
        ret = (int)bio_call_callback(a, BIO_CB_FREE, NULL, 0, 0, 0L, 1L, NULL);
        if (ret <= 0)
            return 0;
    }

    if ((a->method != NULL) && (a->method->destroy != NULL))
        a->method->destroy(a);	// b64_free()

    CRYPTO_free_ex_data(CRYPTO_EX_INDEX_BIO, a, &a->ex_data);

    CRYPTO_THREAD_lock_free(a->lock);

    OPENSSL_free(a);

    return 1;
}