上一节我们已经看到了console的注册,prink的基础就是console,所以本节我们主要看内核中prink的实现。
/**
* printk - print a kernel message
* @fmt: format string
*
* This is printk(). It can be called from any context. We want it to work.
*
* We try to grab the console_lock. If we succeed, it's easy - we log the
* output and call the console drivers. If we fail to get the semaphore, we
* place the output into the log buffer and return. The current holder of
* the console_sem will notice the new output in console_unlock(); and will
* send it to the consoles before releasing the lock.
*
* One effect of this deferred printing is that code which calls printk() and
* then changes console_loglevel may break. This is because console_loglevel
* is inspected when the actual printing occurs.
*
* See also:
* printf(3)
*
* See the vsnprintf() documentation for format string extensions over C99.
*/
asmlinkage __visible int printk(const char *fmt, ...)
{
va_list args;
int r;
va_start(args, fmt);
r = vprintk_func(fmt, args);
va_end(args);
return r;
}
通过printk的注释我们可以看到下面几个特点:
- 它可以从任何上下文中调用。
- 我们尝试获取console_lock。 如果我们成功,那很容易 - 我们记录了输出并调用控制台驱动程序。
- 如果我们无法获得信号量,我们将输出放入日志缓冲区并返回。
- 现任持有人console_sem会注意到console_unlock()中的新输出;
- 还会在释放锁之前将其发送到控制台。
主要实现如下:
__printf(1, 0) int vprintk_func(const char *fmt, va_list args)
{
/*
* Try to use the main logbuf even in NMI. But avoid calling console
* drivers that might have their own locks.
*/
if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) &&
raw_spin_trylock(&logbuf_lock)) {
int len;
len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
raw_spin_unlock(&logbuf_lock);
defer_console_output();
return len;
}
/* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */
if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
return vprintk_nmi(fmt, args);
/* Use extra buffer to prevent a recursion deadlock in safe mode. */
if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK)
return vprintk_safe(fmt, args);
/* No obstacles. */
return vprintk_default(fmt, args);
}
可以看到,这个实现中判断了各种上下文,就像注释总共说的那样,能够获取console_lock,表示现在可以直接打印,如果不能获取,那就放入缓冲区,待上一次的console_unlock的时候会输出缓冲区的内容。
这里我们以最常见的默认情况来分析。
return vprintk_default(fmt, args);
int vprintk_default(const char *fmt, va_list args)
{
int r;
#ifdef CONFIG_KGDB_KDB
/* Allow to pass printk() to kdb but avoid a recursion. */
if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) {
r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
return r;
}
#endif
r = vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
return r;
}
说到这里我们要加入一个东西,就是内核中printk有打印等级
/* integer equivalents of KERN_<LEVEL> */
#define LOGLEVEL_SCHED -2 /* Deferred messages from sched code
* are set to this special level */
#define LOGLEVEL_DEFAULT -1 /* default (or last) loglevel */
#define LOGLEVEL_EMERG 0 /* system is unusable */
#define LOGLEVEL_ALERT 1 /* action must be taken immediately */
#define LOGLEVEL_CRIT 2 /* critical conditions */
#define LOGLEVEL_ERR 3 /* error conditions */
#define LOGLEVEL_WARNING 4 /* warning conditions */
#define LOGLEVEL_NOTICE 5 /* normal but significant condition */
#define LOGLEVEL_INFO 6 /* informational */
#define LOGLEVEL_DEBUG 7 /* debug-level messages */
使用时我们通常使用下面这几个
#define KERN_EMERG KERN_SOH "0" /* system is unusable */
#define KERN_ALERT KERN_SOH "1" /* action must be taken immediately */
#define KERN_CRIT KERN_SOH "2" /* critical conditions */
#define KERN_ERR KERN_SOH "3" /* error conditions */
#define KERN_WARNING KERN_SOH "4" /* warning conditions */
#define KERN_NOTICE KERN_SOH "5" /* normal but significant condition */
#define KERN_INFO KERN_SOH "6" /* informational */
#define KERN_DEBUG KERN_SOH "7" /* debug-level messages */
#define KERN_DEFAULT KERN_SOH "d" /* the default kernel loglevel */
printk(KERN_INFO "Serial: 21285 driver\n");
printk(KERN_ERR "CRC mismatch\n");
下面就是输出函数
asmlinkage int vprintk_emit(int facility, int level,
const char *dict, size_t dictlen,
const char *fmt, va_list args)
{
int printed_len;
bool in_sched = false;
unsigned long flags;
/*
* 默认打印等级处理
*/
if (level == LOGLEVEL_SCHED) {
level = LOGLEVEL_DEFAULT;
in_sched = true;
}
/*
* 有些console打印比较慢,所以要延迟等前一个打印完再继续打印
*/
boot_delay_msec(level);
printk_delay();
/* This stops the holder of console_sem just where we want him */
logbuf_lock_irqsave(flags);
/*
* 格式化处理数据
*/
printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args);
logbuf_unlock_irqrestore(flags);
/* If called from the scheduler, we can not call up(). */
/* 如果不是在调度函数,那就可以直接打印,否则就其它时候打印 */
if (!in_sched) {
/*
* Disable preemption to avoid being preempted while holding
* console_sem which would prevent anyone from printing to
* console
*/
preempt_disable();
/*
* Try to acquire and then immediately release the console
* semaphore. The release will print out buffers and wake up
* /dev/kmsg and syslog() users.
*/
if (console_trylock_spinning())
console_unlock();
preempt_enable();
}
wake_up_klogd();
return printed_len;
}
上面函数我们关注两个点
printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args);
console_unlock();
第一点
#define PREFIX_MAX 32
#define LOG_LINE_MAX (1024 - PREFIX_MAX)
/* Must be called under logbuf_lock. */
int vprintk_store(int facility, int level,
const char *dict, size_t dictlen,
const char *fmt, va_list args)
{
static char textbuf[LOG_LINE_MAX];
char *text = textbuf;
size_t text_len;
enum log_flags lflags = 0;
/*
* The printf needs to come first; we need the syslog
* prefix which might be passed-in as a parameter.
*/
/* 处理前面的syslog信息 */
text_len = vscnprintf(text, sizeof(textbuf), fmt, args);
/* mark and strip a trailing newline */
/* 标升级并删除换行符 */
if (text_len && text[text_len-1] == '\n') {
text_len--;
lflags |= LOG_NEWLINE;
}
/* strip kernel syslog prefix and extract log level or control flags */
/* 剥离内核syslog前缀并提取日志级别或控制标志 */
if (facility == 0) {
int kern_level;
while ((kern_level = printk_get_level(text)) != 0) {
switch (kern_level) {
case '0' ... '7':
if (level == LOGLEVEL_DEFAULT) /* 打印等级转换成数字 */
level = kern_level - '0';
/* fallthrough */
case 'd': /* KERN_DEFAULT */
lflags |= LOG_PREFIX; /* 默认等级,即prink后面直接跟着要打印的东西 */
break;
case 'c': /* KERN_CONT */ /* 分段彪子 */
lflags |= LOG_CONT;
}
text_len -= 2;
text += 2;
}
}
if (level == LOGLEVEL_DEFAULT)
level = default_message_loglevel; /* 默认等级,使用默认处理函数 */
if (dict)
lflags |= LOG_PREFIX|LOG_NEWLINE; /* 打印标志 */
return log_output(facility, level, lflags,
dict, dictlen, text, text_len);
}
内核定义了1000字节的缓冲区,用来保存prink的打印字符。
static size_t log_output(int facility, int level, enum log_flags lflags, const char *dict, size_t dictlen, char *text, size_t text_len)
{
/*
* If an earlier line was buffered, and we're a continuation
* write from the same process, try to add it to the buffer.
* 如果缓冲了较早的行,并且我们是来自同一进程的继续写入,请尝试将其添加到缓冲区。
*/
if (cont.len) {
if (cont.owner == current && (lflags & LOG_CONT)) {
if (cont_add(facility, level, lflags, text, text_len))
return text_len;
}
/* Otherwise, make sure it's flushed */
cont_flush();
}
/* Skip empty continuation lines that couldn't be added - they just flush */
if (!text_len && (lflags & LOG_CONT))
return 0;
/* If it doesn't end in a newline, try to buffer the current line */
if (!(lflags & LOG_NEWLINE)) {
if (cont_add(facility, level, lflags, text, text_len))
return text_len;
}
/* Store it in the record log */
return log_store(facility, level, lflags, 0, dict, dictlen, text, text_len);
}
上面就是对一些标志处理和这个缓冲区快要溢出了,那就把打印信息放到其他缓冲区或者,把该缓冲区的等待刷新完再继续执行。
刷新函数用的是log_store,普通写也是这个,就参数不一样而已。
static void cont_flush(void)
{
if (cont.len == 0)
return;
log_store(cont.facility, cont.level, cont.flags, cont.ts_nsec,
NULL, 0, cont.buf, cont.len);
cont.len = 0;
}
/* insert record into the buffer, discard old ones, update heads */
/ *将记录插入缓冲区,丢弃旧记录,更新磁头* /
static int log_store(int facility, int level,
enum log_flags flags, u64 ts_nsec,
const char *dict, u16 dict_len,
const char *text, u16 text_len)
{
struct printk_log *msg;
u32 size, pad_len;
u16 trunc_msg_len = 0;
/* number of '\0' padding bytes to next message */
size = msg_used_size(text_len, dict_len, &pad_len);
if (log_make_free_space(size)) {
/* truncate the message if it is too long for empty buffer */
size = truncate_msg(&text_len, &trunc_msg_len,
&dict_len, &pad_len);
/* survive when the log buffer is too small for trunc_msg */
if (log_make_free_space(size))
return 0;
}
if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) {
/*
* This message + an additional empty header does not fit
* at the end of the buffer. Add an empty header with len == 0
* to signify a wrap around.
*/
memset(log_buf + log_next_idx, 0, sizeof(struct printk_log));
log_next_idx = 0;
}
/* fill message */
msg = (struct printk_log *)(log_buf + log_next_idx);
memcpy(log_text(msg), text, text_len);
msg->text_len = text_len;
if (trunc_msg_len) {
memcpy(log_text(msg) + text_len, trunc_msg, trunc_msg_len);
msg->text_len += trunc_msg_len;
}
memcpy(log_dict(msg), dict, dict_len);
msg->dict_len = dict_len;
msg->facility = facility;
msg->level = level & 7;
msg->flags = flags & 0x1f;
if (ts_nsec > 0)
msg->ts_nsec = ts_nsec;
else
msg->ts_nsec = local_clock();
memset(log_dict(msg) + dict_len, 0, pad_len);
msg->len = size;
/* insert message */
log_next_idx += msg->len;
log_next_seq++;
return msg->text_len;
}
上面这个主要就是一些数据拷贝和,标记位置的移动。
现在是所以的log信息都在数组缓冲区中。
接下来就看另一个,unlock了。
/**
* console_unlock - unlock the console system
*
* Releases the console_lock which the caller holds on the console system
* and the console driver list.
*
* While the console_lock was held, console output may have been buffered
* by printk(). If this is the case, console_unlock(); emits
* the output prior to releasing the lock.
*
* If there is output waiting, we wake /dev/kmsg and syslog() users.
*
* console_unlock(); may be called from any context.
*/
void console_unlock(void)
{
static char ext_text[CONSOLE_EXT_LOG_MAX];
static char text[LOG_LINE_MAX + PREFIX_MAX];
unsigned long flags;
bool do_cond_resched, retry;
if (console_suspended) {
up_console_sem();
return;
}
/*
* Console drivers are called with interrupts disabled, so
* @console_may_schedule should be cleared before; however, we may
* end up dumping a lot of lines, for example, if called from
* console registration path, and should invoke cond_resched()
* between lines if allowable. Not doing so can cause a very long
* scheduling stall on a slow console leading to RCU stall and
* softlockup warnings which exacerbate the issue with more
* messages practically incapacitating the system.
*
* console_trylock() is not able to detect the preemptive
* context reliably. Therefore the value must be stored before
* and cleared after the the "again" goto label.
*/
do_cond_resched = console_may_schedule;
again:
console_may_schedule = 0;
/*
* We released the console_sem lock, so we need to recheck if
* cpu is online and (if not) is there at least one CON_ANYTIME
* console.
*/
if (!can_use_console()) {
console_locked = 0;
up_console_sem();
return;
}
for (;;) {
struct printk_log *msg;
size_t ext_len = 0;
size_t len;
printk_safe_enter_irqsave(flags);
raw_spin_lock(&logbuf_lock);
if (console_seq < log_first_seq) {
len = sprintf(text, "** %u printk messages dropped **\n",
(unsigned)(log_first_seq - console_seq));
/* messages are gone, move to first one */
console_seq = log_first_seq;
console_idx = log_first_idx;
} else {
len = 0;
}
skip:
if (console_seq == log_next_seq)
break;
msg = log_from_idx(console_idx);
if (suppress_message_printing(msg->level)) {
/*
* Skip record we have buffered and already printed
* directly to the console when we received it, and
* record that has level above the console loglevel.
*/
console_idx = log_next(console_idx);
console_seq++;
goto skip;
}
len += msg_print_text(msg,
console_msg_format & MSG_FORMAT_SYSLOG,
text + len,
sizeof(text) - len);
if (nr_ext_console_drivers) {
ext_len = msg_print_ext_header(ext_text,
sizeof(ext_text),
msg, console_seq);
ext_len += msg_print_ext_body(ext_text + ext_len,
sizeof(ext_text) - ext_len,
log_dict(msg), msg->dict_len,
log_text(msg), msg->text_len);
}
console_idx = log_next(console_idx);
console_seq++;
raw_spin_unlock(&logbuf_lock);
/*
* While actively printing out messages, if another printk()
* were to occur on another CPU, it may wait for this one to
* finish. This task can not be preempted if there is a
* waiter waiting to take over.
*/
console_lock_spinning_enable();
stop_critical_timings(); /* don't trace print latency */
call_console_drivers(ext_text, ext_len, text, len);
start_critical_timings();
if (console_lock_spinning_disable_and_check()) {
printk_safe_exit_irqrestore(flags);
return;
}
printk_safe_exit_irqrestore(flags);
if (do_cond_resched)
cond_resched();
}
console_locked = 0;
/* Release the exclusive_console once it is used */
if (unlikely(exclusive_console))
exclusive_console = NULL;
raw_spin_unlock(&logbuf_lock);
up_console_sem();
/*
* Someone could have filled up the buffer again, so re-check if there's
* something to flush. In case we cannot trylock the console_sem again,
* there's a new owner and the console_unlock() from them will do the
* flush, no worries.
*/
raw_spin_lock(&logbuf_lock);
retry = console_seq != log_next_seq;
raw_spin_unlock(&logbuf_lock);
printk_safe_exit_irqrestore(flags);
if (retry && console_trylock())
goto again;
}
这个函数我们不仔细分析,看主要部分内容。
定义了很大的缓冲区。
#define CONSOLE_EXT_LOG_MAX 8192
#define PREFIX_MAX 32
#define LOG_LINE_MAX (1024 - PREFIX_MAX)
void console_unlock(void)
{
static char ext_text[CONSOLE_EXT_LOG_MAX];
static char text[LOG_LINE_MAX + PREFIX_MAX];
......
}
标记log字符串位置的,第一条索引和序列号的,下一条索引和序列号的,已经prink下一条的写入位置的。
/* the next printk record to read by syslog(READ) or /proc/kmsg */
static u64 syslog_seq;
static u32 syslog_idx;
static size_t syslog_partial;
/* index and sequence number of the first record stored in the buffer */
static u64 log_first_seq;
static u32 log_first_idx;
/* index and sequence number of the next record to store in the buffer */
static u64 log_next_seq;
static u32 log_next_idx;
/* the next printk record to write to the console */
static u64 console_seq;
static u32 console_idx;
/* the next printk record to read after the last 'clear' command */
static u64 clear_seq;
static u32 clear_idx;
最大的log缓冲区,以及这个缓冲区的一些基本操作
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
static char *log_buf = __log_buf;
static u32 log_buf_len = __LOG_BUF_LEN;
/* Return log buffer address */
char *log_buf_addr_get(void)
{
return log_buf;
}
/* Return log buffer size */
u32 log_buf_len_get(void)
{
return log_buf_len;
}
/* human readable text of the record */
static char *log_text(const struct printk_log *msg)
{
return (char *)msg + sizeof(struct printk_log);
}
/* optional key/value pair dictionary attached to the record */
static char *log_dict(const struct printk_log *msg)
{
return (char *)msg + sizeof(struct printk_log) + msg->text_len;
}
/* get record by index; idx must point to valid msg */
static struct printk_log *log_from_idx(u32 idx)
{
struct printk_log *msg = (struct printk_log *)(log_buf + idx);
/*
* A length == 0 record is the end of buffer marker. Wrap around and
* read the message at the start of the buffer.
*/
if (!msg->len)
return (struct printk_log *)log_buf;
return msg;
}
/* get next record; idx must point to valid msg */
static u32 log_next(u32 idx)
{
struct printk_log *msg = (struct printk_log *)(log_buf + idx);
/* length == 0 indicates the end of the buffer; wrap */
/*
* A length == 0 record is the end of buffer marker. Wrap around and
* read the message at the start of the buffer as *this* one, and
* return the one after that.
*/
if (!msg->len) {
msg = (struct printk_log *)log_buf;
return msg->len;
}
return idx + msg->len;
}
知道上面的操作后,我们看下面,打印等级是怎么处理的。
skip:
if (console_seq == log_next_seq)
break;
msg = log_from_idx(console_idx); //取出一个打印
if (suppress_message_printing(msg->level)) { //判断打印等级
/*
* Skip record we have buffered and already printed
* directly to the console when we received it, and
* record that has level above the console loglevel.
*/
/* 等级比设置的等级低,那么就调到一下个log信息,这个低等级的忽略掉 */
console_idx = log_next(console_idx);
console_seq++;
goto skip;
}
/* 判断打印等级 */
static bool suppress_message_printing(int level)
{
return (level >= console_loglevel && !ignore_loglevel);
}
指导了基本缓冲区操作后,我们来到重点console驱动,也是核心
call_console_drivers(ext_text, ext_len, text, len);
可以看到,下面函数其实就是上一节注册的console_drivers链表中依次遍历,确认这个console使能,有写函数,则打印这个log信息。
/*
* Call the console drivers, asking them to write out
* log_buf[start] to log_buf[end - 1].
* The console_lock must be held.
*/
static void call_console_drivers(const char *ext_text, size_t ext_len,
const char *text, size_t len)
{
struct console *con;
trace_console_rcuidle(text, len);
if (!console_drivers)
return;
for_each_console(con) {
if (exclusive_console && con != exclusive_console)
continue;
if (!(con->flags & CON_ENABLED))
continue;
if (!con->write)
continue;
if (!cpu_online(smp_processor_id()) &&
!(con->flags & CON_ANYTIME))
continue;
if (con->flags & CON_EXTENDED)
con->write(con, ext_text, ext_len);
else
con->write(con, text, len);
}
}
下面看一下我们上节注册的串口console
static struct console s3c24xx_serial_console = {
.name = S3C24XX_SERIAL_NAME,
.device = uart_console_device,
.flags = CON_PRINTBUFFER,
.index = -1,
.write = s3c24xx_serial_console_write,
.setup = s3c24xx_serial_console_setup,
.data = &s3c24xx_uart_drv,
};
static void
s3c24xx_serial_console_write(struct console *co, const char *s,
unsigned int count)
{
unsigned int ucon = rd_regl(cons_uart, S3C2410_UCON);
/* not possible to xmit on unconfigured port */
if (!s3c24xx_port_configured(ucon))
return;
uart_console_write(cons_uart, s, count, s3c24xx_serial_console_putchar);
}
/**
* uart_console_write - write a console message to a serial port
* @port: the port to write the message
* @s: array of characters
* @count: number of characters in string to write
* @putchar: function to write character to port
*/
void uart_console_write(struct uart_port *port, const char *s,
unsigned int count,
void (*putchar)(struct uart_port *, int))
{
unsigned int i;
for (i = 0; i < count; i++, s++) {
if (*s == '\n')
putchar(port, '\r');
putchar(port, *s);
}
}
static void
s3c24xx_serial_console_putchar(struct uart_port *port, int ch)
{
unsigned int ufcon = rd_regl(port, S3C2410_UFCON);
while (!s3c24xx_serial_console_txrdy(port, ufcon))
cpu_relax();
wr_regb(port, S3C2410_UTXH, ch);
}
这个字符串写操作是最简单的串口发生函数,没有使用中断,而是直接查询不忙就发送。