kernel/process/
mod.rs

1pub mod scheduler;
2mod syscalls;
3
4use core::sync::atomic::{AtomicU64, Ordering};
5
6use alloc::{collections::BTreeMap, string::String, vec::Vec};
7use kernel_user_link::process::{PriorityLevel, ProcessMetadata};
8
9use crate::{
10    cpu::{self, gdt},
11    executable::{elf, load_elf_to_vm},
12    fs::{
13        self,
14        path::{Path, PathBuf},
15    },
16    graphics::vga,
17    memory_management::{
18        memory_layout::{align_down, align_up, is_aligned, GB, KERNEL_BASE, MB, PAGE_2M, PAGE_4K},
19        virtual_memory_mapper::{
20            self, VirtualMemoryMapEntry, VirtualMemoryMapper, MAX_USER_VIRTUAL_ADDRESS,
21        },
22    },
23};
24
25static PROCESS_ID_ALLOCATOR: GoingUpAllocator = GoingUpAllocator::new();
26// TODO: add dynamic stack allocation
27const INITIAL_STACK_SIZE_PAGES: usize = 256; // 1MB
28
29#[allow(clippy::identity_op)]
30const HEAP_OFFSET_FROM_ELF_END: usize = 1 * MB;
31#[allow(clippy::identity_op)]
32const DEFAULT_MAX_HEAP_SIZE: usize = 1 * GB;
33
34#[derive(Debug)]
35pub enum ProcessError {
36    #[allow(unused)]
37    CouldNotLoadElf(fs::FileSystemError),
38}
39
40impl From<fs::FileSystemError> for ProcessError {
41    fn from(e: fs::FileSystemError) -> Self {
42        Self::CouldNotLoadElf(e)
43    }
44}
45
46struct GoingUpAllocator {
47    next_id: AtomicU64,
48}
49
50impl GoingUpAllocator {
51    const fn new() -> Self {
52        Self {
53            next_id: AtomicU64::new(0),
54        }
55    }
56
57    fn allocate(&self) -> u64 {
58        self.next_id.fetch_add(1, Ordering::SeqCst)
59    }
60}
61
62#[repr(C, align(0x10))]
63#[derive(Debug, Clone, Copy, Default)]
64pub struct FxSave(pub [u128; 32]);
65
66#[repr(C, align(0x10))]
67#[derive(Debug, Clone, Default, Copy)]
68pub struct ProcessContext {
69    pub rflags: u64,
70    pub rip: u64,
71    pub cs: u64,
72    pub ds: u64,
73    pub es: u64,
74    pub fs: u64,
75    pub gs: u64,
76    pub ss: u64,
77    pub dr0: u64,
78    pub dr1: u64,
79    pub dr2: u64,
80    pub dr3: u64,
81    pub dr6: u64,
82    pub dr7: u64,
83    pub rax: u64,
84    pub rbx: u64,
85    pub rcx: u64,
86    pub rdx: u64,
87    pub rsi: u64,
88    pub rdi: u64,
89    pub rsp: u64,
90    pub rbp: u64,
91    pub r8: u64,
92    pub r9: u64,
93    pub r10: u64,
94    pub r11: u64,
95    pub r12: u64,
96    pub r13: u64,
97    pub r14: u64,
98    pub r15: u64,
99    pub fxsave: FxSave,
100}
101
102// TODO: implement threads, for now each process acts as a thread also
103#[allow(dead_code)]
104pub struct Process {
105    vm: VirtualMemoryMapper,
106    context: ProcessContext,
107    id: u64,
108    parent_id: u64,
109
110    // use BTreeMap to keep FDs even after closing some of them
111    open_filesystem_nodes: BTreeMap<usize, fs::FilesystemNode>,
112    file_index_allocator: GoingUpAllocator,
113
114    argv: Vec<String>,
115    file_path: PathBuf,
116
117    current_dir: fs::Directory,
118
119    stack_ptr_end: usize,
120    stack_size: usize,
121
122    heap_start: usize,
123    heap_size: usize,
124    heap_max: usize,
125
126    priority: PriorityLevel,
127
128    // split from the state, so that we can keep it as a simple enum
129    exit_code: i32,
130    children_exits: BTreeMap<u64, i32>,
131}
132
133impl Process {
134    pub fn allocate_process(
135        parent_id: u64,
136        elf: &elf::Elf,
137        file: &mut fs::File,
138        argv: Vec<String>,
139        current_dir: fs::Directory,
140    ) -> Result<Self, ProcessError> {
141        let id = PROCESS_ID_ALLOCATOR.allocate();
142        let mut vm = virtual_memory_mapper::clone_current_vm_as_user();
143
144        let mut process_meta = ProcessMetadata::empty();
145        process_meta.pid = id;
146        let process_meta_addr = MAX_USER_VIRTUAL_ADDRESS - PAGE_4K;
147        vm.map(&VirtualMemoryMapEntry {
148            virtual_address: process_meta_addr,
149            physical_address: None,
150            size: PAGE_4K,
151            flags: virtual_memory_mapper::flags::PTE_USER,
152        });
153        assert!(core::mem::size_of::<ProcessMetadata>() <= PAGE_4K);
154
155        // subtract one page for stack guard
156        let stack_end = process_meta_addr - PAGE_4K;
157        let stack_size = INITIAL_STACK_SIZE_PAGES * PAGE_4K;
158        let stack_start = stack_end - stack_size;
159        vm.map(&VirtualMemoryMapEntry {
160            virtual_address: stack_start,
161            physical_address: None,
162            size: stack_size,
163            flags: virtual_memory_mapper::flags::PTE_USER
164                | virtual_memory_mapper::flags::PTE_WRITABLE,
165        });
166
167        let rsp = stack_end as u64 - 8;
168        let (new_rsp, argc, argv_ptr) =
169            Self::prepare_stack(&mut vm, &argv, rsp, stack_start as u64);
170
171        // SAFETY: we know that the vm passed is an exact kernel copy of this vm, so its safe to switch to it
172        // TODO: maybe it would be best to create the new vm inside this function?
173        let (_min_addr, max_addr) =
174            unsafe { load_elf_to_vm(elf, file, &mut process_meta, &mut vm)? };
175
176        Self::write_process_meta(&mut vm, process_meta_addr, process_meta);
177
178        // SAFETY: we know that the vm is never used after this point until scheduling
179        unsafe { vm.add_process_specific_mappings() };
180
181        // set it quite a distance from the elf and align it to 2MB pages (we are not using 2MB virtual memory, so its not related)
182        let heap_start = align_up(max_addr + HEAP_OFFSET_FROM_ELF_END, PAGE_2M);
183        let heap_size = 0; // start at 0, let user space programs control it
184        let heap_max = DEFAULT_MAX_HEAP_SIZE;
185
186        let mut context = ProcessContext::default();
187        let entry = elf.entry_point();
188        assert!(vm.is_address_mapped(entry as _) && entry < KERNEL_BASE as u64);
189
190        context.rip = entry;
191        context.cs = gdt::get_user_code_seg_index().0 | gdt::USER_RING as u64;
192        context.ds = gdt::get_user_data_seg_index().0 | gdt::USER_RING as u64;
193        context.ss = context.ds;
194        context.rflags = cpu::flags::IF;
195
196        // setup main function arguments and stack
197        context.rsp = new_rsp;
198        // NOTE: This is very specific to x86_64 SYSV abi
199        context.rdi = argc;
200        context.rsi = argv_ptr;
201
202        Ok(Self {
203            vm,
204            context,
205            id,
206            parent_id,
207            open_filesystem_nodes: BTreeMap::new(),
208            file_index_allocator: GoingUpAllocator::new(),
209            argv,
210            file_path: file.path().to_path_buf(),
211            current_dir,
212            stack_ptr_end: stack_end - 8, // 8 bytes for padding
213            stack_size,
214            heap_start,
215            heap_size,
216            heap_max,
217            priority: PriorityLevel::Normal,
218            exit_code: 0,
219            children_exits: BTreeMap::new(),
220        })
221    }
222
223    /// # Safety
224    /// Check [`virtual_memory_mapper::VirtualMemoryMapper::switch_to_this`] for more info
225    pub unsafe fn switch_to_this_vm(&mut self) {
226        self.vm.switch_to_this();
227    }
228
229    pub fn id(&self) -> u64 {
230        self.id
231    }
232
233    #[allow(dead_code)]
234    pub fn parent_id(&self) -> u64 {
235        self.parent_id
236    }
237
238    pub fn is_user_address_mapped(&self, address: usize) -> bool {
239        self.vm.is_address_mapped(address)
240    }
241
242    pub fn finish_stdio(&mut self) {
243        // make sure we have STDIN/STDOUT/STDERR, and the allocator is after them
244        assert!(self.open_filesystem_nodes.len() >= 3);
245        if self.file_index_allocator.next_id.load(Ordering::Relaxed) < 3 {
246            self.file_index_allocator
247                .next_id
248                .store(3, Ordering::Relaxed);
249        }
250    }
251
252    pub fn push_fs_node<F: Into<fs::FilesystemNode>>(&mut self, file: F) -> usize {
253        let fd = self.file_index_allocator.allocate() as usize;
254        assert!(
255            self.open_filesystem_nodes.insert(fd, file.into()).is_none(),
256            "fd already exists"
257        );
258        fd
259    }
260
261    pub fn attach_fs_node_to_fd<F: Into<fs::FilesystemNode>>(
262        &mut self,
263        fd: usize,
264        file: F,
265    ) -> bool {
266        // fail first
267        if self.open_filesystem_nodes.contains_key(&fd) {
268            return false;
269        }
270        // update allocator so that next push_file will not overwrite this fd
271        self.file_index_allocator
272            .next_id
273            .store(fd as u64 + 1, Ordering::SeqCst);
274        // must always return `true`
275        self.open_filesystem_nodes.insert(fd, file.into()).is_none()
276    }
277
278    pub fn get_fs_node(&mut self, fd: usize) -> Option<&mut fs::FilesystemNode> {
279        self.open_filesystem_nodes.get_mut(&fd)
280    }
281
282    pub fn take_fs_node(&mut self, fd: usize) -> Option<fs::FilesystemNode> {
283        self.open_filesystem_nodes.remove(&fd)
284    }
285
286    pub fn put_fs_node(&mut self, fd: usize, file: fs::FilesystemNode) {
287        assert!(
288            self.open_filesystem_nodes.insert(fd, file).is_none(),
289            "fd already exists"
290        )
291    }
292
293    /// Sets the exit_code and prepare to release the resources held by this process
294    /// The scheduler will handle the `state` of the process
295    pub fn exit(&mut self, exit_code: i32) {
296        self.exit_code = exit_code;
297        // release the vga if we have it
298        if let Some(vga) = vga::controller() {
299            vga.release(self.id);
300        }
301    }
302
303    pub fn add_child_exit(&mut self, pid: u64, exit_code: i32) {
304        assert!(
305            self.children_exits.insert(pid, exit_code).is_none(),
306            "child pid already exists"
307        );
308    }
309
310    pub fn get_child_exit(&mut self, pid: u64) -> Option<i32> {
311        self.children_exits.remove(&pid)
312    }
313
314    /// Add/Remove to/from the heap and return the previous end of the heap before the change
315    /// If this is an `Add`, it will return the address of the new block
316    /// If this is a `Remove`, the result will generally be useless
317    /// Use with `0` to get the current heap end
318    pub fn add_to_heap(&mut self, increment: isize) -> Option<usize> {
319        if increment == 0 {
320            return Some(self.heap_start + self.heap_size);
321        }
322
323        assert!(is_aligned(increment.unsigned_abs(), PAGE_4K));
324
325        let new_size = self.heap_size as isize + increment;
326        if new_size < 0 || new_size as usize > self.heap_max {
327            return None;
328        }
329        let old_end = self.heap_start + self.heap_size;
330        self.heap_size = new_size as usize;
331        if increment > 0 {
332            // map the new heap
333            let entry = VirtualMemoryMapEntry {
334                virtual_address: old_end,
335                physical_address: None,
336                size: increment as usize,
337                flags: virtual_memory_mapper::flags::PTE_USER
338                    | virtual_memory_mapper::flags::PTE_WRITABLE,
339            };
340            self.vm.map(&entry);
341        } else {
342            let new_end = old_end - increment.unsigned_abs();
343            // unmap old heap
344            let entry = VirtualMemoryMapEntry {
345                virtual_address: new_end,
346                physical_address: None,
347                size: increment.unsigned_abs(),
348                flags: virtual_memory_mapper::flags::PTE_USER
349                    | virtual_memory_mapper::flags::PTE_WRITABLE,
350            };
351            // `true` because we allocated physical memory using `map`
352            self.vm.unmap(&entry, true);
353        }
354
355        Some(old_end)
356    }
357
358    pub fn get_current_dir(&self) -> &fs::Directory {
359        &self.current_dir
360    }
361
362    pub fn set_current_dir(&mut self, current_dir: fs::Directory) {
363        self.current_dir = current_dir;
364    }
365
366    pub fn get_priority(&self) -> PriorityLevel {
367        self.priority
368    }
369
370    pub fn set_priority(&mut self, priority: PriorityLevel) {
371        self.priority = priority;
372    }
373
374    pub fn file_path(&self) -> &Path {
375        self.file_path.as_path()
376    }
377}
378
379impl Process {
380    // NOTE: this is very specific to 64bit x86
381    fn prepare_stack(
382        vm: &mut VirtualMemoryMapper,
383        argv: &[String],
384        mut rsp: u64,
385        stack_top: u64,
386    ) -> (u64, u64, u64) {
387        // dealing with vm, so we must disable interrupts
388        cpu::cpu().push_cli();
389        let old_vm = virtual_memory_mapper::get_current_vm();
390
391        // switch temporarily so we can map the elf
392        // SAFETY: this must be called while the current vm and this new vm must share the same
393        //         kernel regions
394        unsafe { vm.switch_to_this() };
395
396        let argc = argv.len();
397
398        let mut argv_ptrs = Vec::with_capacity(argv.len());
399        for arg in argv.iter() {
400            let arg_ptr = rsp - arg.len() as u64 - 1;
401            rsp = arg_ptr;
402            // align to 8 bytes
403            rsp -= rsp % 8;
404            assert!(rsp >= stack_top);
405
406            // convert arg_ptr to slice
407            let arg_ptr_slice =
408                unsafe { core::slice::from_raw_parts_mut(arg_ptr as *mut u8, arg.len() + 1) };
409            // copy the arg
410            arg_ptr_slice[..arg.len()].copy_from_slice(arg.as_bytes());
411            // put null terminator
412            arg_ptr_slice[arg.len()] = 0;
413
414            argv_ptrs.push(arg_ptr);
415        }
416        // align to 8 bytes
417        rsp -= rsp % 8;
418        assert!(rsp >= stack_top);
419        // add null terminator
420        let null_ptr = rsp - 1;
421        rsp = null_ptr;
422        unsafe { (null_ptr as *mut u8).write(0) };
423        argv_ptrs.push(null_ptr);
424        // align to 8 bytes
425        rsp -= rsp % 8;
426        assert!(rsp >= stack_top);
427
428        // write the argv array
429        let argv_array_ptr = rsp - (argv_ptrs.len() * 8) as u64;
430        rsp = argv_array_ptr;
431        let argv_array_ptr_slice =
432            unsafe { core::slice::from_raw_parts_mut(argv_array_ptr as *mut u64, argv_ptrs.len()) };
433        argv_array_ptr_slice.copy_from_slice(&argv_ptrs);
434
435        // these are not needed really, since in x86_64 we are using the registers to pass arguments
436        // but we can keep it for the future
437        // add pointer to argv array
438        rsp -= 8;
439        assert!(rsp >= stack_top);
440        unsafe { (rsp as *mut u64).write(argv_array_ptr) };
441        // add argc
442        rsp -= 8;
443        assert!(rsp >= stack_top);
444        unsafe { (rsp as *mut u64).write(argc as u64) };
445
446        // switch back to the old vm
447        unsafe { old_vm.switch_to_this() };
448        // we can be interrupted again
449        cpu::cpu().pop_cli();
450
451        // according ot the SYSV ABI, the stack is 16-byte aligned just before the call instruction
452        // is executed.
453        // i.e. we will subtract 8, as this is the alignment the stack will have after the call
454        // we consider the program starts after an imaginary function call from the kernel
455        //
456        // first align it to 16 bytes
457        rsp = align_down(rsp, 16);
458        // second, subtract 8, the call instruction
459        rsp -= 8;
460
461        (rsp, argc as u64, argv_array_ptr)
462    }
463
464    fn write_process_meta(
465        vm: &mut VirtualMemoryMapper,
466        process_meta_addr: usize,
467        process_meta: ProcessMetadata,
468    ) {
469        // dealing with vm, so we must disable interrupts
470        cpu::cpu().push_cli();
471        let old_vm = virtual_memory_mapper::get_current_vm();
472
473        // switch temporarily so we can map the elf
474        // SAFETY: this must be called while the current vm and this new vm must share the same
475        //         kernel regions
476        unsafe { vm.switch_to_this() };
477
478        // write the process meta
479        let process_meta_ptr = process_meta_addr as *mut ProcessMetadata;
480        unsafe { process_meta_ptr.write(process_meta) };
481
482        // switch back to the old vm
483        unsafe { old_vm.switch_to_this() };
484        // we can be interrupted again
485        cpu::cpu().pop_cli();
486    }
487}
488
489impl Drop for Process {
490    fn drop(&mut self) {
491        assert!(!self.vm.is_used_by_me());
492        self.vm.unmap_process_memory();
493    }
494}