从零开始之驱动发开、linux驱动（六十六、内核调试篇--printk原理）

上一节我们已经看到了console的注册，prink的基础就是console，所以本节我们主要看内核中prink的实现。


/**
 * printk - print a kernel message
 * @fmt: format string
 *
 * This is printk(). It can be called from any context. We want it to work.
 *
 * We try to grab the console_lock. If we succeed, it's easy - we log the
 * output and call the console drivers.  If we fail to get the semaphore, we
 * place the output into the log buffer and return. The current holder of
 * the console_sem will notice the new output in console_unlock(); and will
 * send it to the consoles before releasing the lock.
 *
 * One effect of this deferred printing is that code which calls printk() and
 * then changes console_loglevel may break. This is because console_loglevel
 * is inspected when the actual printing occurs.
 *
 * See also:
 * printf(3)
 *
 * See the vsnprintf() documentation for format string extensions over C99.
 */
asmlinkage __visible int printk(const char *fmt, ...)
{
	va_list args;
	int r;

	va_start(args, fmt);
	r = vprintk_func(fmt, args);
	va_end(args);

	return r;
}

通过printk的注释我们可以看到下面几个特点：

它可以从任何上下文中调用。
我们尝试获取console_lock。如果我们成功，那很容易 - 我们记录了输出并调用控制台驱动程序。
如果我们无法获得信号量，我们将输出放入日志缓冲区并返回。
现任持有人console_sem会注意到console_unlock（）中的新输出;
还会在释放锁之前将其发送到控制台。

主要实现如下：


__printf(1, 0) int vprintk_func(const char *fmt, va_list args)
{
	/*
	 * Try to use the main logbuf even in NMI. But avoid calling console
	 * drivers that might have their own locks.
	 */
	if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) &&
	    raw_spin_trylock(&logbuf_lock)) {
		int len;

		len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
		raw_spin_unlock(&logbuf_lock);
		defer_console_output();
		return len;
	}

	/* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */
	if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
		return vprintk_nmi(fmt, args);

	/* Use extra buffer to prevent a recursion deadlock in safe mode. */
	if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK)
		return vprintk_safe(fmt, args);

	/* No obstacles. */
	return vprintk_default(fmt, args);
}

可以看到，这个实现中判断了各种上下文，就像注释总共说的那样，能够获取console_lock，表示现在可以直接打印，如果不能获取，那就放入缓冲区，待上一次的console_unlock的时候会输出缓冲区的内容。

这里我们以最常见的默认情况来分析。

	return vprintk_default(fmt, args);


int vprintk_default(const char *fmt, va_list args)
{
	int r;

#ifdef CONFIG_KGDB_KDB
	/* Allow to pass printk() to kdb but avoid a recursion. */
	if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) {
		r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
		return r;
	}
#endif
	r = vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);

	return r;
}

说到这里我们要加入一个东西，就是内核中printk有打印等级

/* integer equivalents of KERN_<LEVEL> */
#define LOGLEVEL_SCHED		-2	/* Deferred messages from sched code
					 * are set to this special level */
#define LOGLEVEL_DEFAULT	-1	/* default (or last) loglevel */
#define LOGLEVEL_EMERG		0	/* system is unusable */
#define LOGLEVEL_ALERT		1	/* action must be taken immediately */
#define LOGLEVEL_CRIT		2	/* critical conditions */
#define LOGLEVEL_ERR		3	/* error conditions */
#define LOGLEVEL_WARNING	4	/* warning conditions */
#define LOGLEVEL_NOTICE		5	/* normal but significant condition */
#define LOGLEVEL_INFO		6	/* informational */
#define LOGLEVEL_DEBUG		7	/* debug-level messages */

使用时我们通常使用下面这几个


#define KERN_EMERG	KERN_SOH "0"	/* system is unusable */
#define KERN_ALERT	KERN_SOH "1"	/* action must be taken immediately */
#define KERN_CRIT	KERN_SOH "2"	/* critical conditions */
#define KERN_ERR	KERN_SOH "3"	/* error conditions */
#define KERN_WARNING	KERN_SOH "4"	/* warning conditions */
#define KERN_NOTICE	KERN_SOH "5"	/* normal but significant condition */
#define KERN_INFO	KERN_SOH "6"	/* informational */
#define KERN_DEBUG	KERN_SOH "7"	/* debug-level messages */

#define KERN_DEFAULT	KERN_SOH "d"	/* the default kernel loglevel */

	printk(KERN_INFO "Serial: 21285 driver\n");



        printk(KERN_ERR "CRC mismatch\n");

下面就是输出函数


asmlinkage int vprintk_emit(int facility, int level,
			    const char *dict, size_t dictlen,
			    const char *fmt, va_list args)
{
	int printed_len;
	bool in_sched = false;
	unsigned long flags;


    /*
     * 默认打印等级处理
     */
	if (level == LOGLEVEL_SCHED) {
		level = LOGLEVEL_DEFAULT;
		in_sched = true;
	}

    /*
     * 有些console打印比较慢,所以要延迟等前一个打印完再继续打印
     */
	boot_delay_msec(level);
	printk_delay();

	/* This stops the holder of console_sem just where we want him */
	logbuf_lock_irqsave(flags);

    /*
     * 格式化处理数据
     */
	printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args);
	logbuf_unlock_irqrestore(flags);

	/* If called from the scheduler, we can not call up(). */
    /* 如果不是在调度函数,那就可以直接打印,否则就其它时候打印 */
	if (!in_sched) {
		/*
		 * Disable preemption to avoid being preempted while holding
		 * console_sem which would prevent anyone from printing to
		 * console
		 */
		preempt_disable();
		/*
		 * Try to acquire and then immediately release the console
		 * semaphore.  The release will print out buffers and wake up
		 * /dev/kmsg and syslog() users.
		 */
		if (console_trylock_spinning())
			console_unlock();
		preempt_enable();
	}

	wake_up_klogd();
	return printed_len;
}

上面函数我们关注两个点

	printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args);


        console_unlock();

第一点

#define PREFIX_MAX		32
#define LOG_LINE_MAX		(1024 - PREFIX_MAX)



/* Must be called under logbuf_lock. */
int vprintk_store(int facility, int level,
		  const char *dict, size_t dictlen,
		  const char *fmt, va_list args)
{
	static char textbuf[LOG_LINE_MAX];
	char *text = textbuf;
	size_t text_len;
	enum log_flags lflags = 0;

	/*
	 * The printf needs to come first; we need the syslog
	 * prefix which might be passed-in as a parameter.
	 */
    /* 处理前面的syslog信息 */
	text_len = vscnprintf(text, sizeof(textbuf), fmt, args);

	/* mark and strip a trailing newline */
    /* 标升级并删除换行符 */
	if (text_len && text[text_len-1] == '\n') {
		text_len--;
		lflags |= LOG_NEWLINE;
	}

	/* strip kernel syslog prefix and extract log level or control flags */
    /* 剥离内核syslog前缀并提取日志级别或控制标志 */
	if (facility == 0) {
		int kern_level;

		while ((kern_level = printk_get_level(text)) != 0) {
			switch (kern_level) {
			case '0' ... '7':
				if (level == LOGLEVEL_DEFAULT)    /* 打印等级转换成数字 */
					level = kern_level - '0';
				/* fallthrough */
			case 'd':	/* KERN_DEFAULT */
				lflags |= LOG_PREFIX;            /* 默认等级,即prink后面直接跟着要打印的东西 */
				break;
			case 'c':	/* KERN_CONT */          /* 分段彪子 */
				lflags |= LOG_CONT;
			}

			text_len -= 2;
			text += 2;
		}
	}

	if (level == LOGLEVEL_DEFAULT)
		level = default_message_loglevel;    /* 默认等级,使用默认处理函数 */

	if (dict)
		lflags |= LOG_PREFIX|LOG_NEWLINE;        /* 打印标志 */

	return log_output(facility, level, lflags,
			  dict, dictlen, text, text_len);
}

内核定义了1000字节的缓冲区，用来保存prink的打印字符。


static size_t log_output(int facility, int level, enum log_flags lflags, const char *dict, size_t dictlen, char *text, size_t text_len)
{
	/*
	 * If an earlier line was buffered, and we're a continuation
	 * write from the same process, try to add it to the buffer.
      *  如果缓冲了较早的行，并且我们是来自同一进程的继续写入，请尝试将其添加到缓冲区。
	 */
	if (cont.len) {
		if (cont.owner == current && (lflags & LOG_CONT)) {
			if (cont_add(facility, level, lflags, text, text_len))
				return text_len;
		}
		/* Otherwise, make sure it's flushed */
		cont_flush();
	}

	/* Skip empty continuation lines that couldn't be added - they just flush */
	if (!text_len && (lflags & LOG_CONT))
		return 0;

	/* If it doesn't end in a newline, try to buffer the current line */
	if (!(lflags & LOG_NEWLINE)) {
		if (cont_add(facility, level, lflags, text, text_len))
			return text_len;
	}

	/* Store it in the record log */
	return log_store(facility, level, lflags, 0, dict, dictlen, text, text_len);
}

上面就是对一些标志处理和这个缓冲区快要溢出了，那就把打印信息放到其他缓冲区或者，把该缓冲区的等待刷新完再继续执行。

刷新函数用的是log_store，普通写也是这个，就参数不一样而已。

static void cont_flush(void)
{
	if (cont.len == 0)
		return;

	log_store(cont.facility, cont.level, cont.flags, cont.ts_nsec,
		  NULL, 0, cont.buf, cont.len);
	cont.len = 0;
}


/* insert record into the buffer, discard old ones, update heads */
/ *将记录插入缓冲区，丢弃旧记录，更新磁头* /
static int log_store(int facility, int level,
		     enum log_flags flags, u64 ts_nsec,
		     const char *dict, u16 dict_len,
		     const char *text, u16 text_len)
{
	struct printk_log *msg;
	u32 size, pad_len;
	u16 trunc_msg_len = 0;

	/* number of '\0' padding bytes to next message */
	size = msg_used_size(text_len, dict_len, &pad_len);

	if (log_make_free_space(size)) {
		/* truncate the message if it is too long for empty buffer */
		size = truncate_msg(&text_len, &trunc_msg_len,
				    &dict_len, &pad_len);
		/* survive when the log buffer is too small for trunc_msg */
		if (log_make_free_space(size))
			return 0;
	}

	if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) {
		/*
		 * This message + an additional empty header does not fit
		 * at the end of the buffer. Add an empty header with len == 0
		 * to signify a wrap around.
		 */
		memset(log_buf + log_next_idx, 0, sizeof(struct printk_log));
		log_next_idx = 0;
	}

	/* fill message */
	msg = (struct printk_log *)(log_buf + log_next_idx);
	memcpy(log_text(msg), text, text_len);
	msg->text_len = text_len;
	if (trunc_msg_len) {
		memcpy(log_text(msg) + text_len, trunc_msg, trunc_msg_len);
		msg->text_len += trunc_msg_len;
	}
	memcpy(log_dict(msg), dict, dict_len);
	msg->dict_len = dict_len;
	msg->facility = facility;
	msg->level = level & 7;
	msg->flags = flags & 0x1f;
	if (ts_nsec > 0)
		msg->ts_nsec = ts_nsec;
	else
		msg->ts_nsec = local_clock();
	memset(log_dict(msg) + dict_len, 0, pad_len);
	msg->len = size;

	/* insert message */
	log_next_idx += msg->len;
	log_next_seq++;

	return msg->text_len;
}

上面这个主要就是一些数据拷贝和，标记位置的移动。

现在是所以的log信息都在数组缓冲区中。

接下来就看另一个，unlock了。


/**
 * console_unlock - unlock the console system
 *
 * Releases the console_lock which the caller holds on the console system
 * and the console driver list.
 *
 * While the console_lock was held, console output may have been buffered
 * by printk().  If this is the case, console_unlock(); emits
 * the output prior to releasing the lock.
 *
 * If there is output waiting, we wake /dev/kmsg and syslog() users.
 *
 * console_unlock(); may be called from any context.
 */
void console_unlock(void)
{
	static char ext_text[CONSOLE_EXT_LOG_MAX];
	static char text[LOG_LINE_MAX + PREFIX_MAX];
	unsigned long flags;
	bool do_cond_resched, retry;

	if (console_suspended) {
		up_console_sem();
		return;
	}

	/*
	 * Console drivers are called with interrupts disabled, so
	 * @console_may_schedule should be cleared before; however, we may
	 * end up dumping a lot of lines, for example, if called from
	 * console registration path, and should invoke cond_resched()
	 * between lines if allowable.  Not doing so can cause a very long
	 * scheduling stall on a slow console leading to RCU stall and
	 * softlockup warnings which exacerbate the issue with more
	 * messages practically incapacitating the system.
	 *
	 * console_trylock() is not able to detect the preemptive
	 * context reliably. Therefore the value must be stored before
	 * and cleared after the the "again" goto label.
	 */
	do_cond_resched = console_may_schedule;
again:
	console_may_schedule = 0;

	/*
	 * We released the console_sem lock, so we need to recheck if
	 * cpu is online and (if not) is there at least one CON_ANYTIME
	 * console.
	 */
	if (!can_use_console()) {
		console_locked = 0;
		up_console_sem();
		return;
	}

	for (;;) {
		struct printk_log *msg;
		size_t ext_len = 0;
		size_t len;

		printk_safe_enter_irqsave(flags);
		raw_spin_lock(&logbuf_lock);
		if (console_seq < log_first_seq) {
			len = sprintf(text, "** %u printk messages dropped **\n",
				      (unsigned)(log_first_seq - console_seq));

			/* messages are gone, move to first one */
			console_seq = log_first_seq;
			console_idx = log_first_idx;
		} else {
			len = 0;
		}
skip:
		if (console_seq == log_next_seq)
			break;

		msg = log_from_idx(console_idx);
		if (suppress_message_printing(msg->level)) {
			/*
			 * Skip record we have buffered and already printed
			 * directly to the console when we received it, and
			 * record that has level above the console loglevel.
			 */
			console_idx = log_next(console_idx);
			console_seq++;
			goto skip;
		}

		len += msg_print_text(msg,
				console_msg_format & MSG_FORMAT_SYSLOG,
				text + len,
				sizeof(text) - len);
		if (nr_ext_console_drivers) {
			ext_len = msg_print_ext_header(ext_text,
						sizeof(ext_text),
						msg, console_seq);
			ext_len += msg_print_ext_body(ext_text + ext_len,
						sizeof(ext_text) - ext_len,
						log_dict(msg), msg->dict_len,
						log_text(msg), msg->text_len);
		}
		console_idx = log_next(console_idx);
		console_seq++;
		raw_spin_unlock(&logbuf_lock);

		/*
		 * While actively printing out messages, if another printk()
		 * were to occur on another CPU, it may wait for this one to
		 * finish. This task can not be preempted if there is a
		 * waiter waiting to take over.
		 */
		console_lock_spinning_enable();

		stop_critical_timings();	/* don't trace print latency */
		call_console_drivers(ext_text, ext_len, text, len);
		start_critical_timings();

		if (console_lock_spinning_disable_and_check()) {
			printk_safe_exit_irqrestore(flags);
			return;
		}

		printk_safe_exit_irqrestore(flags);

		if (do_cond_resched)
			cond_resched();
	}

	console_locked = 0;

	/* Release the exclusive_console once it is used */
	if (unlikely(exclusive_console))
		exclusive_console = NULL;

	raw_spin_unlock(&logbuf_lock);

	up_console_sem();

	/*
	 * Someone could have filled up the buffer again, so re-check if there's
	 * something to flush. In case we cannot trylock the console_sem again,
	 * there's a new owner and the console_unlock() from them will do the
	 * flush, no worries.
	 */
	raw_spin_lock(&logbuf_lock);
	retry = console_seq != log_next_seq;
	raw_spin_unlock(&logbuf_lock);
	printk_safe_exit_irqrestore(flags);

	if (retry && console_trylock())
		goto again;
}

这个函数我们不仔细分析，看主要部分内容。

定义了很大的缓冲区。

#define CONSOLE_EXT_LOG_MAX	8192

#define PREFIX_MAX		32
#define LOG_LINE_MAX		(1024 - PREFIX_MAX)

void console_unlock(void)
{
	static char ext_text[CONSOLE_EXT_LOG_MAX];
	static char text[LOG_LINE_MAX + PREFIX_MAX];

    ......
}

标记log字符串位置的，第一条索引和序列号的，下一条索引和序列号的，已经prink下一条的写入位置的。

/* the next printk record to read by syslog(READ) or /proc/kmsg */
static u64 syslog_seq;
static u32 syslog_idx;
static size_t syslog_partial;

/* index and sequence number of the first record stored in the buffer */
static u64 log_first_seq;
static u32 log_first_idx;

/* index and sequence number of the next record to store in the buffer */
static u64 log_next_seq;
static u32 log_next_idx;

/* the next printk record to write to the console */
static u64 console_seq;
static u32 console_idx;

/* the next printk record to read after the last 'clear' command */
static u64 clear_seq;
static u32 clear_idx;

最大的log缓冲区，以及这个缓冲区的一些基本操作

#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
static char *log_buf = __log_buf;
static u32 log_buf_len = __LOG_BUF_LEN;



/* Return log buffer address */
char *log_buf_addr_get(void)
{
	return log_buf;
}

/* Return log buffer size */
u32 log_buf_len_get(void)
{
	return log_buf_len;
}

/* human readable text of the record */
static char *log_text(const struct printk_log *msg)
{
	return (char *)msg + sizeof(struct printk_log);
}

/* optional key/value pair dictionary attached to the record */
static char *log_dict(const struct printk_log *msg)
{
	return (char *)msg + sizeof(struct printk_log) + msg->text_len;
}

/* get record by index; idx must point to valid msg */
static struct printk_log *log_from_idx(u32 idx)
{
	struct printk_log *msg = (struct printk_log *)(log_buf + idx);

	/*
	 * A length == 0 record is the end of buffer marker. Wrap around and
	 * read the message at the start of the buffer.
	 */
	if (!msg->len)
		return (struct printk_log *)log_buf;
	return msg;
}

/* get next record; idx must point to valid msg */
static u32 log_next(u32 idx)
{
	struct printk_log *msg = (struct printk_log *)(log_buf + idx);

	/* length == 0 indicates the end of the buffer; wrap */
	/*
	 * A length == 0 record is the end of buffer marker. Wrap around and
	 * read the message at the start of the buffer as *this* one, and
	 * return the one after that.
	 */
	if (!msg->len) {
		msg = (struct printk_log *)log_buf;
		return msg->len;
	}
	return idx + msg->len;
}

知道上面的操作后，我们看下面，打印等级是怎么处理的。

skip:
		if (console_seq == log_next_seq)
			break;

		msg = log_from_idx(console_idx);        //取出一个打印
		if (suppress_message_printing(msg->level)) {    //判断打印等级
			/*
			 * Skip record we have buffered and already printed
			 * directly to the console when we received it, and
			 * record that has level above the console loglevel.
			 */
            /* 等级比设置的等级低,那么就调到一下个log信息,这个低等级的忽略掉 */
			console_idx = log_next(console_idx);
			console_seq++;
			goto skip;
		}


/* 判断打印等级 */
static bool suppress_message_printing(int level)
{
	return (level >= console_loglevel && !ignore_loglevel);
}

指导了基本缓冲区操作后，我们来到重点console驱动，也是核心

		call_console_drivers(ext_text, ext_len, text, len);

可以看到，下面函数其实就是上一节注册的console_drivers链表中依次遍历，确认这个console使能，有写函数，则打印这个log信息。


/*
 * Call the console drivers, asking them to write out
 * log_buf[start] to log_buf[end - 1].
 * The console_lock must be held.
 */
static void call_console_drivers(const char *ext_text, size_t ext_len,
				 const char *text, size_t len)
{
	struct console *con;

	trace_console_rcuidle(text, len);

	if (!console_drivers)
		return;

	for_each_console(con) {
		if (exclusive_console && con != exclusive_console)
			continue;
		if (!(con->flags & CON_ENABLED))
			continue;
		if (!con->write)
			continue;
		if (!cpu_online(smp_processor_id()) &&
		    !(con->flags & CON_ANYTIME))
			continue;
		if (con->flags & CON_EXTENDED)
			con->write(con, ext_text, ext_len);
		else
			con->write(con, text, len);
	}
}

下面看一下我们上节注册的串口console

static struct console s3c24xx_serial_console = {
	.name		= S3C24XX_SERIAL_NAME,
	.device		= uart_console_device,
	.flags		= CON_PRINTBUFFER,
	.index		= -1,
	.write		= s3c24xx_serial_console_write,
	.setup		= s3c24xx_serial_console_setup,
	.data		= &s3c24xx_uart_drv,
};


static void
s3c24xx_serial_console_write(struct console *co, const char *s,
			     unsigned int count)
{
	unsigned int ucon = rd_regl(cons_uart, S3C2410_UCON);

	/* not possible to xmit on unconfigured port */
	if (!s3c24xx_port_configured(ucon))
		return;

	uart_console_write(cons_uart, s, count, s3c24xx_serial_console_putchar);
}



/**
 *	uart_console_write - write a console message to a serial port
 *	@port: the port to write the message
 *	@s: array of characters
 *	@count: number of characters in string to write
 *	@putchar: function to write character to port
 */
void uart_console_write(struct uart_port *port, const char *s,
			unsigned int count,
			void (*putchar)(struct uart_port *, int))
{
	unsigned int i;

	for (i = 0; i < count; i++, s++) {
		if (*s == '\n')
			putchar(port, '\r');
		putchar(port, *s);
	}
}

static void
s3c24xx_serial_console_putchar(struct uart_port *port, int ch)
{
	unsigned int ufcon = rd_regl(port, S3C2410_UFCON);

	while (!s3c24xx_serial_console_txrdy(port, ufcon))
		cpu_relax();
	wr_regb(port, S3C2410_UTXH, ch);
}

这个字符串写操作是最简单的串口发生函数，没有使用中断，而是直接查询不忙就发送。

从零开始之驱动发开、linux驱动（六十六、内核调试篇--printk原理）

猜你喜欢