1 / 51

Advanced Char Driver Operations

Advanced Char Driver Operations. Linux Kernel Programming CIS 4930/COP 5641. Advanced (Manual) Sleeping. Advanced Sleeping. Uses low-level functions to affect a sleep How a process sleeps 1. Allocate and initialize a wait_queue_t structure DEFINE_WAIT(my_wait); Or wait_queue_t my_wait;

evers
Download Presentation

Advanced Char Driver Operations

An Image/Link below is provided (as is) to download presentation Download Policy: Content on the Website is provided to you AS IS for your information and personal use and may not be sold / licensed / shared on other websites without getting consent from its author. Content is provided to you AS IS for your information and personal use only. Download presentation by click this link. While downloading, if for some reason you are not able to download a presentation, the publisher may have deleted the file from their server. During download, if you can't get a presentation, the file might be deleted by the publisher.

E N D

Presentation Transcript


  1. Advanced Char Driver Operations Linux Kernel Programming CIS 4930/COP 5641

  2. Advanced (Manual) Sleeping

  3. Advanced Sleeping • Uses low-level functions to affect a sleep • How a process sleeps 1. Allocate and initialize a wait_queue_t structure DEFINE_WAIT(my_wait); • Or wait_queue_t my_wait; init_wait(&my_wait); Queue element

  4. Advanced Sleeping 2. Add to the proper wait queue and mark a process as being asleep • TASK_RUNNINGTASK_INTERRUPTIBLE or TASK_UNINTERRUPTIBLE • Call void prepare_to_wait(wait_queue_head_t *queue, wait_queue_t *wait, int state);

  5. Advanced Sleeping 3. Give up the processor • Double check the sleeping condition before going to sleep • The wakeup thread might have changed the condition between steps 1 and 2 if (/* sleeping condition */) { schedule(); /* yield the CPU */ }

  6. Advanced Sleeping 4. Return from sleep Remove the process from the wait queue if schedule() was not called void finish_wait(wait_queue_head_t *queue, wait_queue_t *wait);

  7. Advanced Sleeping • scullpipewrite method /* How much space is free? */ static int spacefree(struct scull_pipe *dev) { if (dev->rp == dev->wp) return dev->buffersize - 1; return ((dev->rp + dev->buffersize - dev->wp) % dev->buffersize) - 1; }

  8. Advanced Sleeping static ssize_t scull_p_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos) { structscull_pipe *dev = filp->private_data; int result; if (mutex_lock_interruptible(&dev->mutex)) return -ERESTARTSYS; /* Wait for space for writing */ result = scull_getwritespace(dev, filp); if (result) return result; /* scull_getwritespace called mutex_unlock(&dev->mutex) */ /* ok, space is there, accept something */ count = min(count, (size_t)spacefree(dev));

  9. Advanced Sleeping if (dev->wp >= dev->rp) count = min(count, (size_t)(dev->end - dev->wp)); else /* the write pointer has wrapped, fill up to rp - 1 */ count = min(count, (size_t)(dev->rp - dev->wp - 1)); if (copy_from_user(dev->wp, buf, count)) { mutex_unlock(&dev->mutex); return -EFAULT; } dev->wp += count; if (dev->wp == dev->end) dev->wp = dev->buffer; /* wrapped */ mutex_unlock(&dev->mutex); wake_up_interruptible(&dev->inq); if (dev->async_queue) kill_fasync(&dev->async_queue, SIGIO, POLL_IN); return count; }

  10. Advanced Sleeping /* Wait for space for writing; caller must hold device mutex. * On error the mutex will be released before returning. */ static intscull_getwritespace(structscull_pipe *dev, struct file *filp) { while (spacefree(dev) == 0) { /* full */ DEFINE_WAIT(wait); mutex_unlock(&dev->mutex); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE); if (spacefree(dev) == 0) schedule(); finish_wait(&dev->outq, &wait); if (signal_pending(current)) return -ERESTARTSYS; if (mutex_lock_interruptible(&dev->mutex)) return -ERESTARTSYS; } return 0; } Queue: full Task state: RUNNING

  11. Advanced Sleeping /* Wait for space for writing; caller must hold device mutex. * On error the mutex will be released before returning. */ static intscull_getwritespace(structscull_pipe *dev, struct file *filp) { while (spacefree(dev) == 0) { /* full */ DEFINE_WAIT(wait); mutex_unlock(&dev->mutex); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE); if (spacefree(dev) == 0) schedule(); finish_wait(&dev->outq, &wait); if (signal_pending(current)) return -ERESTARTSYS; if (mutex_lock_interruptible(&dev->mutex)) return -ERESTARTSYS; } return 0; } Queue: full Task state: RUNNING  INTERRUPTIBLE

  12. Advanced Sleeping /* Wait for space for writing; caller must hold device mutex. * On error the mutex will be released before returning. */ static intscull_getwritespace(structscull_pipe *dev, struct file *filp) { while (spacefree(dev) == 0) { /* full */ DEFINE_WAIT(wait); mutex_unlock(&dev->mutex); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE); if (spacefree(dev) == 0) schedule(); finish_wait(&dev->outq, &wait); if (signal_pending(current)) return -ERESTARTSYS; if (mutex_lock_interruptible(&dev->mutex)) return -ERESTARTSYS; } return 0; } Queue: full Task state: INTERRUPTIBLE /* sleep */

  13. Exclusive Waits • Avoid waking up all processes waiting on a queue • Wakes up only one process • Call void prepare_to_wait_exclusive(wait_queue_heat_t *queue, wait_queue_t *wait, int state); • Set the WQ_FLAG_EXCLUSIVE flag • Add the queue entry to the end of the wait queue • wake_up stops after waking the first process with the flag set

  14. The Details of Waking Up /* wakes up all processes waiting on the queue */ void wake_up(wait_queue_head_t *queue); /* wakes up processes that perform an interruptible sleep */ void wake_up_interruptible(wait_queue_head_t *queue); /* wake up to nr exclusive waiters */ void wake_up_nr(wait_queue_head_t *queue, int nr); void wake_up_interruptible_nr(wait_queue_head_t *queue, int nr); /* wake up all exclusive waiters */ void wake_up_all(wait_queue_head_t *queue); void wake_up_interruptible_all(wait_queue_head_t *queue); /* do not lose the CPU during this call */ void wake_up_interruptible_sync(wait_queue_head_t *queue);

  15. poll and select(and epoll)

  16. poll and select (and epoll) • Nonblocking I/Os often involve the use of poll, select, and epoll system calls • Allow a process to determine whether it can read or write one or more open files without blocking • Can block a process until any of a set of file descriptors becomes available for reading and writing • select introduced in BSD Linux • poll introduced in System V • epoll added in 2.5.45 • Improved scaling on the number of file descriptors

  17. poll and select • All three calls supported through the poll method unsigned int (*poll) (struct file *filp, poll_table *wait); 1. Call poll_wait on one or more wait queues that could indicate a change in the poll status • If no file descriptors are available, wait 2. Return a bit mask describing the operations that could be immediately performed without blocking

  18. poll and select • poll_table defined in <linux/poll.h> • To add a wait queue into the poll_table, call void poll_wait(struct file *, wait_queue_head_t *, poll_table *); • Bit mask flags defined in <linux/poll.h> • POLLIN • Set if the device can be read without blocking

  19. poll and select • POLLOUT • Set if the device can be written without blocking • POLLRDNORM • Set if “normal” data is available for reading • A readable device returns (POLLIN | POLLRDNORM) • POLLWRNORM • Same meaning as POLLOUT • A writable device returns (POLLOUT | POLLWRNORM) • POLLPRI • High-priority data can be read without blocking

  20. poll and select • POLLHUP • Returns when a process reads the end-of-file • POLLERR • An error condition has occurred • POLLRDBAND • Out-of-band data is available for reading • Associated with sockets • POLLWRBAND • Data with nonzero priority can be written to the device • Associated with sockets

  21. poll and select • Example static unsigned intscull_p_poll(struct file *filp, poll_table *wait) { structscull_pipe *dev = filp->private_data; unsigned int mask = 0; mutex_lock(&dev->mutex); poll_wait(filp, &dev->inq, wait); poll_wait(filp, &dev->outq, wait); if (dev->rp != dev->wp) /* circular buffer not empty */ mask |= POLLIN | POLLRDNORM; /* readable */ if (spacefree(dev)) /* circular buffer not full */ mask |= POLLOUT | POLLWRNORM; /* writable */ mutex_unlock(&dev->mutex); return mask; }

  22. poll and select • No end-of-file support • Scull pipe does not implement this • If it did… • The reader could see an end-of-file when all writers close the file • Check dev->nwriters in read and poll • Problem when a reader opens the scullpipe before the writer • Need blocking within open

  23. Interaction with read and write • Reading from the device • If there is data in the input buffer, return at least one byte • poll returns POLLIN | POLLRDNORM • If no data is available • If O_NONBLOCK is set, return –EAGAIN • poll must report the device unreadable until one byte arrives • At the end-of-file, read returns 0, poll returns POLLHUP

  24. Interaction with read and write • Writing to the device • If there is space in the output buffer, accept at least one byte • poll reports that the devices is writable by returning POLLOUT | POLLWRNORM • If the output buffer is full, write blocks • If O_NONBLOCK is set, write returns –EAGAIN • poll reports that the file is not writable • If the device is full, write returns -ENOSPC

  25. Interaction with read and write • In write, never wait for data transmission before returning • Or, select may block • To make sure the output buffer is actually transmitted, use fsync call

  26. Interaction with read and write • To flush pending output, call fsync int (*fsync) (struct file *file, loff_t, loff_t, int datasync); • Should return only when the device has been completely flushed • datasync: • Used by file systems, ignored by drivers

  27. The Underlying Data Structure

  28. The Underlying Data Structure • When the poll call completes, poll_table is deallocated with all wait queue entries removed • epoll reduces this overhead of setting up and tearing down the data structure between every I/O

  29. Asynchronous Notification • Polling • Inefficient for rare events • A solution: asynchronous notification • Application receives a signal whenever data becomes available • Two steps • Specify a process as the owner of the file (so that the kernel knows whom to notify) • Set the FASYNC flag in the device via fcntl command

  30. Asynchronous Notification • Example (user space) /* create a signal handler */ signal(SIGIO, &input_handler); /* set current pid the owner of the stdin */ fcntl(STDIN_FILENO, F_SETOWN, getpid()); /* obtain the current file control flags */ oflags = fcntl(STDIN_FILENO, F_GETFL); /* set the asynchronous flag */ fcntl(STDIN_FILENO, F_SETFL, oflags | FASYNC);

  31. Asynchronous Notification • Some catches • Not all devices support asynchronous notification • Usually available for sockets and ttys • Need to know which input file to process • Still need to use poll or select

  32. The Driver’s Point of View 1. When F_SETOWN is invoked, a value is assigned to filp->f_owner 2. When F_SETFL is executed to change the status of FASYNC • The driver’s fasync method is called static int scull_p_fasync(int fd, struct file *filp, int mode) { struct scull_pipe *dev = filp->private_data; return fasync_helper(fd, filp, mode, &dev->async_queue); }

  33. The Driver’s Point of View • fasync_helper adds or removes processes from the asynchronous list void fasync_helper(int fd, struct file *filp, int mode, struct fasync_struct **fa); 3. When data arrives, send a SIGNO signal to all processes registered for asynchronous notification • Near the end of write, notify blocked readers if (dev->async_queue) kill_fasync(&dev->async_queue, SIGIO, POLL_IN); • Similarly for read, as needed

  34. The Driver’s Point of View 4. When the file is closed, remove the file from the list of asynchronous readers in the release method scull_p_fasync(-1, filp, 0);

  35. Access Control

  36. Access Control on a Device File • Prevents unauthorized users from using the device • Sometimes permits only one authorized user to open the device at a time

  37. Single-Open Devices • Example: scullsingle static atomic_t scull_s_available = ATOMIC_INIT(1); static int scull_s_open(struct inode *inode, struct file *filp) { struct scull_dev *dev = &scull_s_device; if (!atomic_dec_and_test(&scull_s_available)) { atomic_inc(&scull_s_available); return -EBUSY; /* already open */ } /* then, everything else is the same as before */ if ((filp->f_flags & O_ACCMODE) == O_WRONLY) scull_trim(dev); filp->private_data = dev; return 0; /* success */ } Returns true, if the tested value is 0

  38. Single-Open Devices • In the release call, marks the device idle static int scull_s_release(struct inode *inode, struct file *filp) { atomic_inc(&scull_s_available); /* release the device */ return 0; }

  39. Restricting Access to a Single User (with multiple processes) at a Time • Example: sculluid • Includes the following in the open call spin_lock(&scull_u_lock); if (scull_u_count && /* someone is using the device */ (scull_u_owner != current->uid) && /* not the same user */ (scull_u_owner != current->euid) && /* not the same effective uid (for su) */ !capable(CAP_DAC_OVERRIDE)) { /* not root override */ spin_unlock(&scull_u_lock); return -EBUSY; /* -EPERM would confuse the user */ } if (scull_u_count == 0) scull_u_owner = current->uid; scull_u_count++; spin_unlock(&scull_u_lock);

  40. Restricting Access to a Single User (with Multiple Processes) at a Time • Includes the following in the release call static int scull_u_release(struct inode *inode, struct file *filp) { spin_lock(&scull_u_lock); scull_u_count--; /* nothing else */ spin_unlock(&scull_u_lock); return 0; }

  41. Blocking open as an Alternative to EBUSY (scullwuid) • A user might prefer to wait over getting errors • E.g., data communication channel spin_lock(&scull_w_lock); while (!scull_w_available()) { spin_unlock(&scull_w_lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(scull_w_wait, scull_w_available())) return -ERESTARTSYS; /* tell the fs layer to handle it */ spin_lock(&scull_w_lock); } if (scull_w_count == 0) scull_w_owner = current->uid; scull_w_count++; spin_unlock(&scull_w_lock);

  42. Blocking open as an Alternative to EBUSY (scullwuid) • The release method wakes pending processes static int scull_w_release(struct inode *inode, struct file *filp) { int temp; spin_lock(&scull_w_lock); scull_w_count--; temp = scull_w_count; spin_unlock(&scull_w_lock); if (temp == 0) wake_up_interruptible_sync(&scull_w_wait); return 0; }

  43. Blocking open as an Alternative to EBUSY • Might not be the right semantics for interactive users • Blocking on cp vs. getting a return value –EBUSY or -EPERM • Incompatible policies for the same device • One solution: one device node per policy

  44. Cloning the Device on open • Allows the creation of private, virtual devices • E.g., One virtual scull device for each process with different tty device number • Example: scullpriv

  45. Cloning the Device on open static int scull_c_open(struct inode *inode, struct file *filp) { struct scull_dev *dev; dev_t key; if (!current->signal->tty) { PDEBUG("Process \"%s\" has no ctl tty\n", current->comm); return -EINVAL; } key = tty_devnum(current->signal->tty); spin_lock(&scull_c_lock); dev = scull_c_lookfor_device(key); spin_unlock(&scull_c_lock); if (!dev) return -ENOMEM; .../* then, everything else is the same as before */ }

  46. Cloning the Device on open /* The clone-specific data structure includes a key field */ struct scull_listitem { struct scull_dev device; dev_t key; struct list_head list; }; /* The list of devices, and a lock to protect it */ static LIST_HEAD(scull_c_list); static spinlock_t scull_c_lock = SPIN_LOCK_UNLOCKED;

  47. Cloning the Device on open /* Look for a device or create one if missing */ static struct scull_dev *scull_c_lookfor_device(dev_t key) { struct scull_listitem *lptr; list_for_each_entry(lptr, &scull_c_list, list) { if (lptr->key == key) return &(lptr->device); } /* not found */ lptr = kzalloc(sizeof(struct scull_listitem), GFP_KERNEL); if (!lptr) return NULL;

  48. Cloning the Device on open /* initialize the device */ lptr->key = key; scull_trim(&(lptr->device)); /* initialize it */ mutex_init(&(lptr->device.mutex)); /* place it in the list */ list_add(&lptr->list, &scull_c_list); return &(lptr->device);

  49. struct list_head { struct list_head *next; struct list_head *prev; }; struct list_head { struct list_head *next; struct list_head *prev; } list; What’s going on? scull_listitem struct scull_dev device; dev_t key; scull_c_list

  50. (put|get)_user() • copy_to_user and copy_from_userseen previously • Data transfer functions optimized for most used data sizes (1, 2, 4, and 8 bytes) • If the size mismatches • Cryptic compiler error message: • Conversion to non-scalar type requested • #include <linux/uaccess.h> • put_user(datum, ptr) • Writes to a user-space address • Calls access_ok() • Returns 0 on success, -EFAULT on error

More Related