Understand how to write some useful kernel code
Describe the features of a simple program
Go through each change necessary to rewrite the program as a kernel module
Review the final product of this process
A string with length
main
struct example
{
char *message;
size_t size;
};
Careful with memory
In userspace commonly not a material issue
Can check with valgrind
More serious in kernel
main() {
handle = allocate("data")
print(get(handle))
set(handle, "new_data")
print(get(handle))
deallocate(handle)
}
Using <string.h>
Is goto considered "harmful"?
Benefits:
Less repetitive
Less indentation
As it is written: "if you need more than 3 levels of indentation, you're screwed anyway and you should fix your program".
Pseudocode from user code:
if unable to allocate x:
goto no_need_to_free;
if unable to allocate more:
goto need_to_free;
success (fallthrough):
return x
need_to_free:
free(x)
no_need_to_free:
return x;
Note:
Using err()
from <err.h>
err(3)
= perror(3)
+ exit(2)
errno
clobbered by each systemcall
temperrno
used to get around thisUsing standard output:
$ ./user_example
hello
goodbye
What works differently in the kernel?
No C library in the kernel
-#include <err.h>
-#include <errno.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/slab.h>
Basic module building blocks
module_{init,exit}()
macros defined
MODULE_LICENSE()
macro defined
strlen()
has same name
kstrdup()
is equivalent of strdup()
like kstrdup()
take on new names and new arguments.
kmalloc()
and kfree()
struct example
{
char *message;
size_t size;
};
Kernel provides: u8
, u16
, ...
Dunder (e.g. __u8
) versions provided to userspace
Compare to userspace <stdint.h>
Example: include/asm-generic/int-ll64.h
Some common types have typedef
s
e.g. pid_t
hides int
Problems?
Ambiguous, e.g. how to printf?
Just show me the type!
struct task_struct
kmalloc()
takes an extra argument?
- struct example *ex = malloc(sizeof *ex);
+ struct example *ex = kmalloc(sizeof *ex, GFP_KERNEL);
if(!ex)
goto out;
kstrdup()
takes the same argument?
ex->size = strlen(msg);
- ex->message = strdup(msg);
+ ex->message = kstrdup(msg, GFP_KERNEL);
if(!ex->message)
goto out_free;
kfree()
works as expected
return ex;
out_free:
- free(ex);
+ kfree(ex);
ex = NULL;
out:
return ex;
kfree()
works as expected 2
@@ -30,17 +27,17 @@ out:
static void example_destroy(struct example *ex)
{
- free(ex->message);
- free(ex);
+ kfree(ex->message);
+ kfree(ex);
}
static bool example_update_message(struct example *ex, const char *msg)
{
size_t size = strlen(msg);
- char *data = strdup(msg);
+ char *data = kstrdup(msg, GFP_KERNEL);
if(!data)
return false;
- free(ex->message);
+ kfree(ex->message);
ex->message = data;
ex->size = size;
return true;
From include/linux/slab.h
static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t flags)
For tracking memory allocations
Security benefits e.g. KASAN: Kernel Address Sanitizer
Profiling and performance enhancements
void *kmalloc(size_t size, gfp_t flags)
gfp_t
?From include/linux/slab.h
* Below is a brief outline of the most useful GFP flags
*
* %GFP_KERNEL
* Allocate normal kernel ram. May sleep.
*
* %GFP_NOWAIT
* Allocation will not sleep.
*
* %GFP_ATOMIC
* Allocation will not sleep. May use emergency pools.
*
typedef unsigned int __bitwise gfp_t;
* Also it is possible to set different flags by OR'ing
* in one or more of the following additional @flags:
*
* %__GFP_ZERO
* Zero the allocated memory before returning. Also see kzalloc().
*
* %__GFP_HIGH
* This allocation has high priority and may use emergency pools.
*
* %__GFP_NOFAIL
* Indicate that this allocation is in no way allowed to fail
* (think twice before using).
*
* %__GFP_NORETRY
* If memory is not immediately available,
* then give up at once.
*
* %__GFP_NOWARN
* If allocation fails, don't issue any warnings.
*
* %__GFP_RETRY_MAYFAIL
* Try really hard to succeed the allocation but fail
* eventually.
Section 6.4.2
of the C standard says:
"All identifiers that begin with a double underscore (__) or begin with an underscore (_) followed by an uppercase letter are reserved for any use, except those identifiers which are lexically identical to keywords"
😳
foo(); // may be EXPORT_SYMBOL'd as part of an API
_foo(); // helper function called by foo()
__foo(); // more internal guts of _foo()
___foo(); // maybe going a little too far
________foo(); // HERE BE DRAGONS
Process context: usually GFP_KERNEL
Interrupt context: GFP_ATOMIC
-int main(void)
+int example_init(void)
Is this change necessary?
+ int ret = -ENOMEM;
We don't check for errors, we generate the errors
Concept | Userspace representation | Kernelspace representation |
---|---|---|
Files | int fd |
struct file |
Errors | library-provided errno |
return -ESOMERR |
Default to -ENOMEM
We can't fail in any other way
Error value frequently defaults to 0
(success)
Generate with: void * ERR_PTR(long err)
Check with: long IS_ERR(const void *ptr)
+ const char *msg;
struct example *ex = example_create("hello");
+ msg = KERN_ERR "unable to allocate memory";
What can appear right next to a string literal?
Section 6.4.5 of the standard requires it
The kernel source code compiles
Adjacent strings literals are concatenated
Nothing else is valid C in our context
Therefore, KERN_ERR
resolves to a string literal
From Section 6.4.4.4 of the C standard
Octal escape sequences are allowed
e.g. \024
, \21
, \0
digits in base 8
From: include/linux/kernel_levels.h
#define KERN_SOH "\001" /* ASCII Start Of Header */
#define KERN_SOH_ASCII '\001'
#define KERN_EMERG KERN_SOH "0" /* system is unusable */
#define KERN_ALERT KERN_SOH "1" /* action must be taken immediately */
#define KERN_CRIT KERN_SOH "2" /* critical conditions */
#define KERN_ERR KERN_SOH "3" /* error conditions */
#define KERN_WARNING KERN_SOH "4" /* warning conditions */
#define KERN_NOTICE KERN_SOH "5" /* normal but significant condition */
#define KERN_INFO KERN_SOH "6" /* informational */
#define KERN_DEBUG KERN_SOH "7" /* debug-level messages */
#define KERN_DEFAULT "" /* the default kernel loglevel */
Relatively well-documented
printk(KERN_ERR "foobar")
pr_*
macros described in the printk documentation
Less room for error
Can re-define pr_fmt()
for custom wrappers
goto
if(!ex)
- err(1, "unable to allocate memory");
- printf("%s\n", example_get_message(ex));
- if(!example_update_message(ex, "goodbye")) {
- int temperrno = errno;
- example_destroy(ex);
- errno = temperrno;
- err(1, "unable to update");
- }
- printf("%s\n", example_get_message(ex));
+ goto out;
pr_info
Print to the kernel ring buffer instead of standard out
+
+ pr_info("%s\n", example_get_message(ex));
+
+ msg = KERN_ERR "unable to update\n";
+ if(!example_update_message(ex, "goodbye"))
+ goto out_free;
+
+ pr_info("%s\n", example_get_message(ex));
+ ret = 0;
+ msg = NULL;
+out_free:
example_destroy(ex);
- return 0;
+out:
+ if(msg)
+ printk(msg);
+ return ret;
+}
Why do we need this?
+void example_exit(void)
+{
}
+module_init(example_init);
+module_exit(example_exit);
What are these?
<module_name>.mod.c
__visible struct module __this_module
__section(".gnu.linkonce.this_module") = {
.name = KBUILD_MODNAME,
.init = init_module,
#ifdef CONFIG_MODULE_UNLOAD
.exit = cleanup_module,
#endif
.arch = MODULE_ARCH_INIT,
};
init()
member type is int (*)(void)
exit()
member type is void (*)(void)
module_init()
defined here
__inittest()
used for type checking
The alias function attribute binds init_module
to our chosen identifier.
The cleanup_module
function is similarly generated
+MODULE_LICENSE("GPL");
Build fails without this
Use kmalloc()/kfree()
to call kernel memory allocator
Additional argument depending on execution context
Be very careful with memory in the kernel!
Entry point for load-time setup specified by module_init()
Entry point for unload-time cleaup specified by module_exit()
Don't be afraid of a little goto
action
Return negative errno values
Print informational messages to kernel ring buffer
dmesg
commmand[33684.303081] hello
[33684.303277] goodbye
Further reading: https://kdlp.underground.software/articles/module_translation
msg = (silence)
whoami = None
singularity v0.6-56-g8e52bc8 https://github.com/underground-software/singularity