]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - android-sdk/kernel-video.git/blob - fs/fuse/dev.c
Merge branch 'p-ti-linux-3.8.y' into p-ti-android-3.8.y
[android-sdk/kernel-video.git] / fs / fuse / dev.c
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
9 #include "fuse_i.h"
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
19 #include <linux/pipe_fs_i.h>
20 #include <linux/swap.h>
21 #include <linux/splice.h>
22 #include <linux/freezer.h>
24 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
25 MODULE_ALIAS("devname:fuse");
27 static struct kmem_cache *fuse_req_cachep;
29 static struct fuse_conn *fuse_get_conn(struct file *file)
30 {
31         /*
32          * Lockless access is OK, because file->private data is set
33          * once during mount and is valid until the file is released.
34          */
35         return file->private_data;
36 }
38 static void fuse_request_init(struct fuse_req *req)
39 {
40         memset(req, 0, sizeof(*req));
41         INIT_LIST_HEAD(&req->list);
42         INIT_LIST_HEAD(&req->intr_entry);
43         init_waitqueue_head(&req->waitq);
44         atomic_set(&req->count, 1);
45 }
47 struct fuse_req *fuse_request_alloc(void)
48 {
49         struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
50         if (req)
51                 fuse_request_init(req);
52         return req;
53 }
54 EXPORT_SYMBOL_GPL(fuse_request_alloc);
56 struct fuse_req *fuse_request_alloc_nofs(void)
57 {
58         struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
59         if (req)
60                 fuse_request_init(req);
61         return req;
62 }
64 void fuse_request_free(struct fuse_req *req)
65 {
66         kmem_cache_free(fuse_req_cachep, req);
67 }
69 static void block_sigs(sigset_t *oldset)
70 {
71         sigset_t mask;
73         siginitsetinv(&mask, sigmask(SIGKILL));
74         sigprocmask(SIG_BLOCK, &mask, oldset);
75 }
77 static void restore_sigs(sigset_t *oldset)
78 {
79         sigprocmask(SIG_SETMASK, oldset, NULL);
80 }
82 static void __fuse_get_request(struct fuse_req *req)
83 {
84         atomic_inc(&req->count);
85 }
87 /* Must be called with > 1 refcount */
88 static void __fuse_put_request(struct fuse_req *req)
89 {
90         BUG_ON(atomic_read(&req->count) < 2);
91         atomic_dec(&req->count);
92 }
94 static void fuse_req_init_context(struct fuse_req *req)
95 {
96         req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
97         req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
98         req->in.h.pid = current->pid;
99 }
101 struct fuse_req *fuse_get_req(struct fuse_conn *fc)
103         struct fuse_req *req;
104         sigset_t oldset;
105         int intr;
106         int err;
108         atomic_inc(&fc->num_waiting);
109         block_sigs(&oldset);
110         intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
111         restore_sigs(&oldset);
112         err = -EINTR;
113         if (intr)
114                 goto out;
116         err = -ENOTCONN;
117         if (!fc->connected)
118                 goto out;
120         req = fuse_request_alloc();
121         err = -ENOMEM;
122         if (!req)
123                 goto out;
125         fuse_req_init_context(req);
126         req->waiting = 1;
127         return req;
129  out:
130         atomic_dec(&fc->num_waiting);
131         return ERR_PTR(err);
133 EXPORT_SYMBOL_GPL(fuse_get_req);
135 /*
136  * Return request in fuse_file->reserved_req.  However that may
137  * currently be in use.  If that is the case, wait for it to become
138  * available.
139  */
140 static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
141                                          struct file *file)
143         struct fuse_req *req = NULL;
144         struct fuse_file *ff = file->private_data;
146         do {
147                 wait_event(fc->reserved_req_waitq, ff->reserved_req);
148                 spin_lock(&fc->lock);
149                 if (ff->reserved_req) {
150                         req = ff->reserved_req;
151                         ff->reserved_req = NULL;
152                         req->stolen_file = get_file(file);
153                 }
154                 spin_unlock(&fc->lock);
155         } while (!req);
157         return req;
160 /*
161  * Put stolen request back into fuse_file->reserved_req
162  */
163 static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
165         struct file *file = req->stolen_file;
166         struct fuse_file *ff = file->private_data;
168         spin_lock(&fc->lock);
169         fuse_request_init(req);
170         BUG_ON(ff->reserved_req);
171         ff->reserved_req = req;
172         wake_up_all(&fc->reserved_req_waitq);
173         spin_unlock(&fc->lock);
174         fput(file);
177 /*
178  * Gets a requests for a file operation, always succeeds
179  *
180  * This is used for sending the FLUSH request, which must get to
181  * userspace, due to POSIX locks which may need to be unlocked.
182  *
183  * If allocation fails due to OOM, use the reserved request in
184  * fuse_file.
185  *
186  * This is very unlikely to deadlock accidentally, since the
187  * filesystem should not have it's own file open.  If deadlock is
188  * intentional, it can still be broken by "aborting" the filesystem.
189  */
190 struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
192         struct fuse_req *req;
194         atomic_inc(&fc->num_waiting);
195         wait_event(fc->blocked_waitq, !fc->blocked);
196         req = fuse_request_alloc();
197         if (!req)
198                 req = get_reserved_req(fc, file);
200         fuse_req_init_context(req);
201         req->waiting = 1;
202         return req;
205 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
207         if (atomic_dec_and_test(&req->count)) {
208                 if (req->waiting)
209                         atomic_dec(&fc->num_waiting);
211                 if (req->stolen_file)
212                         put_reserved_req(fc, req);
213                 else
214                         fuse_request_free(req);
215         }
217 EXPORT_SYMBOL_GPL(fuse_put_request);
219 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
221         unsigned nbytes = 0;
222         unsigned i;
224         for (i = 0; i < numargs; i++)
225                 nbytes += args[i].size;
227         return nbytes;
230 static u64 fuse_get_unique(struct fuse_conn *fc)
232         fc->reqctr++;
233         /* zero is special */
234         if (fc->reqctr == 0)
235                 fc->reqctr = 1;
237         return fc->reqctr;
240 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
242         req->in.h.len = sizeof(struct fuse_in_header) +
243                 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
244         list_add_tail(&req->list, &fc->pending);
245         req->state = FUSE_REQ_PENDING;
246         if (!req->waiting) {
247                 req->waiting = 1;
248                 atomic_inc(&fc->num_waiting);
249         }
250         wake_up(&fc->waitq);
251         kill_fasync(&fc->fasync, SIGIO, POLL_IN);
254 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
255                        u64 nodeid, u64 nlookup)
257         forget->forget_one.nodeid = nodeid;
258         forget->forget_one.nlookup = nlookup;
260         spin_lock(&fc->lock);
261         if (fc->connected) {
262                 fc->forget_list_tail->next = forget;
263                 fc->forget_list_tail = forget;
264                 wake_up(&fc->waitq);
265                 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
266         } else {
267                 kfree(forget);
268         }
269         spin_unlock(&fc->lock);
272 static void flush_bg_queue(struct fuse_conn *fc)
274         while (fc->active_background < fc->max_background &&
275                !list_empty(&fc->bg_queue)) {
276                 struct fuse_req *req;
278                 req = list_entry(fc->bg_queue.next, struct fuse_req, list);
279                 list_del(&req->list);
280                 fc->active_background++;
281                 req->in.h.unique = fuse_get_unique(fc);
282                 queue_request(fc, req);
283         }
286 /*
287  * This function is called when a request is finished.  Either a reply
288  * has arrived or it was aborted (and not yet sent) or some error
289  * occurred during communication with userspace, or the device file
290  * was closed.  The requester thread is woken up (if still waiting),
291  * the 'end' callback is called if given, else the reference to the
292  * request is released
293  *
294  * Called with fc->lock, unlocks it
295  */
296 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
297 __releases(fc->lock)
299         void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
300         req->end = NULL;
301         list_del(&req->list);
302         list_del(&req->intr_entry);
303         req->state = FUSE_REQ_FINISHED;
304         if (req->background) {
305                 if (fc->num_background == fc->max_background) {
306                         fc->blocked = 0;
307                         wake_up_all(&fc->blocked_waitq);
308                 }
309                 if (fc->num_background == fc->congestion_threshold &&
310                     fc->connected && fc->bdi_initialized) {
311                         clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
312                         clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
313                 }
314                 fc->num_background--;
315                 fc->active_background--;
316                 flush_bg_queue(fc);
317         }
318         spin_unlock(&fc->lock);
319         wake_up(&req->waitq);
320         if (end)
321                 end(fc, req);
322         fuse_put_request(fc, req);
325 static void wait_answer_interruptible(struct fuse_conn *fc,
326                                       struct fuse_req *req)
327 __releases(fc->lock)
328 __acquires(fc->lock)
330         if (signal_pending(current))
331                 return;
333         spin_unlock(&fc->lock);
334         wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
335         spin_lock(&fc->lock);
338 static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
340         list_add_tail(&req->intr_entry, &fc->interrupts);
341         wake_up(&fc->waitq);
342         kill_fasync(&fc->fasync, SIGIO, POLL_IN);
345 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
346 __releases(fc->lock)
347 __acquires(fc->lock)
349         if (!fc->no_interrupt) {
350                 /* Any signal may interrupt this */
351                 wait_answer_interruptible(fc, req);
353                 if (req->aborted)
354                         goto aborted;
355                 if (req->state == FUSE_REQ_FINISHED)
356                         return;
358                 req->interrupted = 1;
359                 if (req->state == FUSE_REQ_SENT)
360                         queue_interrupt(fc, req);
361         }
363         if (!req->force) {
364                 sigset_t oldset;
366                 /* Only fatal signals may interrupt this */
367                 block_sigs(&oldset);
368                 wait_answer_interruptible(fc, req);
369                 restore_sigs(&oldset);
371                 if (req->aborted)
372                         goto aborted;
373                 if (req->state == FUSE_REQ_FINISHED)
374                         return;
376                 /* Request is not yet in userspace, bail out */
377                 if (req->state == FUSE_REQ_PENDING) {
378                         list_del(&req->list);
379                         __fuse_put_request(req);
380                         req->out.h.error = -EINTR;
381                         return;
382                 }
383         }
385         /*
386          * Either request is already in userspace, or it was forced.
387          * Wait it out.
388          */
389         spin_unlock(&fc->lock);
391         while (req->state != FUSE_REQ_FINISHED)
392                 wait_event_freezable(req->waitq,
393                                      req->state == FUSE_REQ_FINISHED);
394         spin_lock(&fc->lock);
396         if (!req->aborted)
397                 return;
399  aborted:
400         BUG_ON(req->state != FUSE_REQ_FINISHED);
401         if (req->locked) {
402                 /* This is uninterruptible sleep, because data is
403                    being copied to/from the buffers of req.  During
404                    locked state, there mustn't be any filesystem
405                    operation (e.g. page fault), since that could lead
406                    to deadlock */
407                 spin_unlock(&fc->lock);
408                 wait_event(req->waitq, !req->locked);
409                 spin_lock(&fc->lock);
410         }
413 void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
415         req->isreply = 1;
416         spin_lock(&fc->lock);
417         if (!fc->connected)
418                 req->out.h.error = -ENOTCONN;
419         else if (fc->conn_error)
420                 req->out.h.error = -ECONNREFUSED;
421         else {
422                 req->in.h.unique = fuse_get_unique(fc);
423                 queue_request(fc, req);
424                 /* acquire extra reference, since request is still needed
425                    after request_end() */
426                 __fuse_get_request(req);
428                 request_wait_answer(fc, req);
429         }
430         spin_unlock(&fc->lock);
432 EXPORT_SYMBOL_GPL(fuse_request_send);
434 static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
435                                             struct fuse_req *req)
437         req->background = 1;
438         fc->num_background++;
439         if (fc->num_background == fc->max_background)
440                 fc->blocked = 1;
441         if (fc->num_background == fc->congestion_threshold &&
442             fc->bdi_initialized) {
443                 set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
444                 set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
445         }
446         list_add_tail(&req->list, &fc->bg_queue);
447         flush_bg_queue(fc);
450 static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
452         spin_lock(&fc->lock);
453         if (fc->connected) {
454                 fuse_request_send_nowait_locked(fc, req);
455                 spin_unlock(&fc->lock);
456         } else {
457                 req->out.h.error = -ENOTCONN;
458                 request_end(fc, req);
459         }
462 void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
464         req->isreply = 1;
465         fuse_request_send_nowait(fc, req);
467 EXPORT_SYMBOL_GPL(fuse_request_send_background);
469 static int fuse_request_send_notify_reply(struct fuse_conn *fc,
470                                           struct fuse_req *req, u64 unique)
472         int err = -ENODEV;
474         req->isreply = 0;
475         req->in.h.unique = unique;
476         spin_lock(&fc->lock);
477         if (fc->connected) {
478                 queue_request(fc, req);
479                 err = 0;
480         }
481         spin_unlock(&fc->lock);
483         return err;
486 /*
487  * Called under fc->lock
488  *
489  * fc->connected must have been checked previously
490  */
491 void fuse_request_send_background_locked(struct fuse_conn *fc,
492                                          struct fuse_req *req)
494         req->isreply = 1;
495         fuse_request_send_nowait_locked(fc, req);
498 /*
499  * Lock the request.  Up to the next unlock_request() there mustn't be
500  * anything that could cause a page-fault.  If the request was already
501  * aborted bail out.
502  */
503 static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
505         int err = 0;
506         if (req) {
507                 spin_lock(&fc->lock);
508                 if (req->aborted)
509                         err = -ENOENT;
510                 else
511                         req->locked = 1;
512                 spin_unlock(&fc->lock);
513         }
514         return err;
517 /*
518  * Unlock request.  If it was aborted during being locked, the
519  * requester thread is currently waiting for it to be unlocked, so
520  * wake it up.
521  */
522 static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
524         if (req) {
525                 spin_lock(&fc->lock);
526                 req->locked = 0;
527                 if (req->aborted)
528                         wake_up(&req->waitq);
529                 spin_unlock(&fc->lock);
530         }
533 struct fuse_copy_state {
534         struct fuse_conn *fc;
535         int write;
536         struct fuse_req *req;
537         const struct iovec *iov;
538         struct pipe_buffer *pipebufs;
539         struct pipe_buffer *currbuf;
540         struct pipe_inode_info *pipe;
541         unsigned long nr_segs;
542         unsigned long seglen;
543         unsigned long addr;
544         struct page *pg;
545         void *mapaddr;
546         void *buf;
547         unsigned len;
548         unsigned move_pages:1;
549 };
551 static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
552                            int write,
553                            const struct iovec *iov, unsigned long nr_segs)
555         memset(cs, 0, sizeof(*cs));
556         cs->fc = fc;
557         cs->write = write;
558         cs->iov = iov;
559         cs->nr_segs = nr_segs;
562 /* Unmap and put previous page of userspace buffer */
563 static void fuse_copy_finish(struct fuse_copy_state *cs)
565         if (cs->currbuf) {
566                 struct pipe_buffer *buf = cs->currbuf;
568                 if (!cs->write) {
569                         buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
570                 } else {
571                         kunmap(buf->page);
572                         buf->len = PAGE_SIZE - cs->len;
573                 }
574                 cs->currbuf = NULL;
575                 cs->mapaddr = NULL;
576         } else if (cs->mapaddr) {
577                 kunmap(cs->pg);
578                 if (cs->write) {
579                         flush_dcache_page(cs->pg);
580                         set_page_dirty_lock(cs->pg);
581                 }
582                 put_page(cs->pg);
583                 cs->mapaddr = NULL;
584         }
587 /*
588  * Get another pagefull of userspace buffer, and map it to kernel
589  * address space, and lock request
590  */
591 static int fuse_copy_fill(struct fuse_copy_state *cs)
593         unsigned long offset;
594         int err;
596         unlock_request(cs->fc, cs->req);
597         fuse_copy_finish(cs);
598         if (cs->pipebufs) {
599                 struct pipe_buffer *buf = cs->pipebufs;
601                 if (!cs->write) {
602                         err = buf->ops->confirm(cs->pipe, buf);
603                         if (err)
604                                 return err;
606                         BUG_ON(!cs->nr_segs);
607                         cs->currbuf = buf;
608                         cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
609                         cs->len = buf->len;
610                         cs->buf = cs->mapaddr + buf->offset;
611                         cs->pipebufs++;
612                         cs->nr_segs--;
613                 } else {
614                         struct page *page;
616                         if (cs->nr_segs == cs->pipe->buffers)
617                                 return -EIO;
619                         page = alloc_page(GFP_HIGHUSER);
620                         if (!page)
621                                 return -ENOMEM;
623                         buf->page = page;
624                         buf->offset = 0;
625                         buf->len = 0;
627                         cs->currbuf = buf;
628                         cs->mapaddr = kmap(page);
629                         cs->buf = cs->mapaddr;
630                         cs->len = PAGE_SIZE;
631                         cs->pipebufs++;
632                         cs->nr_segs++;
633                 }
634         } else {
635                 if (!cs->seglen) {
636                         BUG_ON(!cs->nr_segs);
637                         cs->seglen = cs->iov[0].iov_len;
638                         cs->addr = (unsigned long) cs->iov[0].iov_base;
639                         cs->iov++;
640                         cs->nr_segs--;
641                 }
642                 err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
643                 if (err < 0)
644                         return err;
645                 BUG_ON(err != 1);
646                 offset = cs->addr % PAGE_SIZE;
647                 cs->mapaddr = kmap(cs->pg);
648                 cs->buf = cs->mapaddr + offset;
649                 cs->len = min(PAGE_SIZE - offset, cs->seglen);
650                 cs->seglen -= cs->len;
651                 cs->addr += cs->len;
652         }
654         return lock_request(cs->fc, cs->req);
657 /* Do as much copy to/from userspace buffer as we can */
658 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
660         unsigned ncpy = min(*size, cs->len);
661         if (val) {
662                 if (cs->write)
663                         memcpy(cs->buf, *val, ncpy);
664                 else
665                         memcpy(*val, cs->buf, ncpy);
666                 *val += ncpy;
667         }
668         *size -= ncpy;
669         cs->len -= ncpy;
670         cs->buf += ncpy;
671         return ncpy;
674 static int fuse_check_page(struct page *page)
676         if (page_mapcount(page) ||
677             page->mapping != NULL ||
678             page_count(page) != 1 ||
679             (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
680              ~(1 << PG_locked |
681                1 << PG_referenced |
682                1 << PG_uptodate |
683                1 << PG_lru |
684                1 << PG_active |
685                1 << PG_reclaim))) {
686                 printk(KERN_WARNING "fuse: trying to steal weird page\n");
687                 printk(KERN_WARNING "  page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
688                 return 1;
689         }
690         return 0;
693 static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
695         int err;
696         struct page *oldpage = *pagep;
697         struct page *newpage;
698         struct pipe_buffer *buf = cs->pipebufs;
700         unlock_request(cs->fc, cs->req);
701         fuse_copy_finish(cs);
703         err = buf->ops->confirm(cs->pipe, buf);
704         if (err)
705                 return err;
707         BUG_ON(!cs->nr_segs);
708         cs->currbuf = buf;
709         cs->len = buf->len;
710         cs->pipebufs++;
711         cs->nr_segs--;
713         if (cs->len != PAGE_SIZE)
714                 goto out_fallback;
716         if (buf->ops->steal(cs->pipe, buf) != 0)
717                 goto out_fallback;
719         newpage = buf->page;
721         if (WARN_ON(!PageUptodate(newpage)))
722                 return -EIO;
724         ClearPageMappedToDisk(newpage);
726         if (fuse_check_page(newpage) != 0)
727                 goto out_fallback_unlock;
729         /*
730          * This is a new and locked page, it shouldn't be mapped or
731          * have any special flags on it
732          */
733         if (WARN_ON(page_mapped(oldpage)))
734                 goto out_fallback_unlock;
735         if (WARN_ON(page_has_private(oldpage)))
736                 goto out_fallback_unlock;
737         if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
738                 goto out_fallback_unlock;
739         if (WARN_ON(PageMlocked(oldpage)))
740                 goto out_fallback_unlock;
742         err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
743         if (err) {
744                 unlock_page(newpage);
745                 return err;
746         }
748         page_cache_get(newpage);
750         if (!(buf->flags & PIPE_BUF_FLAG_LRU))
751                 lru_cache_add_file(newpage);
753         err = 0;
754         spin_lock(&cs->fc->lock);
755         if (cs->req->aborted)
756                 err = -ENOENT;
757         else
758                 *pagep = newpage;
759         spin_unlock(&cs->fc->lock);
761         if (err) {
762                 unlock_page(newpage);
763                 page_cache_release(newpage);
764                 return err;
765         }
767         unlock_page(oldpage);
768         page_cache_release(oldpage);
769         cs->len = 0;
771         return 0;
773 out_fallback_unlock:
774         unlock_page(newpage);
775 out_fallback:
776         cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
777         cs->buf = cs->mapaddr + buf->offset;
779         err = lock_request(cs->fc, cs->req);
780         if (err)
781                 return err;
783         return 1;
786 static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
787                          unsigned offset, unsigned count)
789         struct pipe_buffer *buf;
791         if (cs->nr_segs == cs->pipe->buffers)
792                 return -EIO;
794         unlock_request(cs->fc, cs->req);
795         fuse_copy_finish(cs);
797         buf = cs->pipebufs;
798         page_cache_get(page);
799         buf->page = page;
800         buf->offset = offset;
801         buf->len = count;
803         cs->pipebufs++;
804         cs->nr_segs++;
805         cs->len = 0;
807         return 0;
810 /*
811  * Copy a page in the request to/from the userspace buffer.  Must be
812  * done atomically
813  */
814 static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
815                           unsigned offset, unsigned count, int zeroing)
817         int err;
818         struct page *page = *pagep;
820         if (page && zeroing && count < PAGE_SIZE)
821                 clear_highpage(page);
823         while (count) {
824                 if (cs->write && cs->pipebufs && page) {
825                         return fuse_ref_page(cs, page, offset, count);
826                 } else if (!cs->len) {
827                         if (cs->move_pages && page &&
828                             offset == 0 && count == PAGE_SIZE) {
829                                 err = fuse_try_move_page(cs, pagep);
830                                 if (err <= 0)
831                                         return err;
832                         } else {
833                                 err = fuse_copy_fill(cs);
834                                 if (err)
835                                         return err;
836                         }
837                 }
838                 if (page) {
839                         void *mapaddr = kmap_atomic(page);
840                         void *buf = mapaddr + offset;
841                         offset += fuse_copy_do(cs, &buf, &count);
842                         kunmap_atomic(mapaddr);
843                 } else
844                         offset += fuse_copy_do(cs, NULL, &count);
845         }
846         if (page && !cs->write)
847                 flush_dcache_page(page);
848         return 0;
851 /* Copy pages in the request to/from userspace buffer */
852 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
853                            int zeroing)
855         unsigned i;
856         struct fuse_req *req = cs->req;
857         unsigned offset = req->page_offset;
858         unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
860         for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
861                 int err;
863                 err = fuse_copy_page(cs, &req->pages[i], offset, count,
864                                      zeroing);
865                 if (err)
866                         return err;
868                 nbytes -= count;
869                 count = min(nbytes, (unsigned) PAGE_SIZE);
870                 offset = 0;
871         }
872         return 0;
875 /* Copy a single argument in the request to/from userspace buffer */
876 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
878         while (size) {
879                 if (!cs->len) {
880                         int err = fuse_copy_fill(cs);
881                         if (err)
882                                 return err;
883                 }
884                 fuse_copy_do(cs, &val, &size);
885         }
886         return 0;
889 /* Copy request arguments to/from userspace buffer */
890 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
891                           unsigned argpages, struct fuse_arg *args,
892                           int zeroing)
894         int err = 0;
895         unsigned i;
897         for (i = 0; !err && i < numargs; i++)  {
898                 struct fuse_arg *arg = &args[i];
899                 if (i == numargs - 1 && argpages)
900                         err = fuse_copy_pages(cs, arg->size, zeroing);
901                 else
902                         err = fuse_copy_one(cs, arg->value, arg->size);
903         }
904         return err;
907 static int forget_pending(struct fuse_conn *fc)
909         return fc->forget_list_head.next != NULL;
912 static int request_pending(struct fuse_conn *fc)
914         return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
915                 forget_pending(fc);
918 /* Wait until a request is available on the pending list */
919 static void request_wait(struct fuse_conn *fc)
920 __releases(fc->lock)
921 __acquires(fc->lock)
923         DECLARE_WAITQUEUE(wait, current);
925         add_wait_queue_exclusive(&fc->waitq, &wait);
926         while (fc->connected && !request_pending(fc)) {
927                 set_current_state(TASK_INTERRUPTIBLE);
928                 if (signal_pending(current))
929                         break;
931                 spin_unlock(&fc->lock);
932                 schedule();
933                 spin_lock(&fc->lock);
934         }
935         set_current_state(TASK_RUNNING);
936         remove_wait_queue(&fc->waitq, &wait);
939 /*
940  * Transfer an interrupt request to userspace
941  *
942  * Unlike other requests this is assembled on demand, without a need
943  * to allocate a separate fuse_req structure.
944  *
945  * Called with fc->lock held, releases it
946  */
947 static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
948                                size_t nbytes, struct fuse_req *req)
949 __releases(fc->lock)
951         struct fuse_in_header ih;
952         struct fuse_interrupt_in arg;
953         unsigned reqsize = sizeof(ih) + sizeof(arg);
954         int err;
956         list_del_init(&req->intr_entry);
957         req->intr_unique = fuse_get_unique(fc);
958         memset(&ih, 0, sizeof(ih));
959         memset(&arg, 0, sizeof(arg));
960         ih.len = reqsize;
961         ih.opcode = FUSE_INTERRUPT;
962         ih.unique = req->intr_unique;
963         arg.unique = req->in.h.unique;
965         spin_unlock(&fc->lock);
966         if (nbytes < reqsize)
967                 return -EINVAL;
969         err = fuse_copy_one(cs, &ih, sizeof(ih));
970         if (!err)
971                 err = fuse_copy_one(cs, &arg, sizeof(arg));
972         fuse_copy_finish(cs);
974         return err ? err : reqsize;
977 static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
978                                                unsigned max,
979                                                unsigned *countp)
981         struct fuse_forget_link *head = fc->forget_list_head.next;
982         struct fuse_forget_link **newhead = &head;
983         unsigned count;
985         for (count = 0; *newhead != NULL && count < max; count++)
986                 newhead = &(*newhead)->next;
988         fc->forget_list_head.next = *newhead;
989         *newhead = NULL;
990         if (fc->forget_list_head.next == NULL)
991                 fc->forget_list_tail = &fc->forget_list_head;
993         if (countp != NULL)
994                 *countp = count;
996         return head;
999 static int fuse_read_single_forget(struct fuse_conn *fc,
1000                                    struct fuse_copy_state *cs,
1001                                    size_t nbytes)
1002 __releases(fc->lock)
1004         int err;
1005         struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
1006         struct fuse_forget_in arg = {
1007                 .nlookup = forget->forget_one.nlookup,
1008         };
1009         struct fuse_in_header ih = {
1010                 .opcode = FUSE_FORGET,
1011                 .nodeid = forget->forget_one.nodeid,
1012                 .unique = fuse_get_unique(fc),
1013                 .len = sizeof(ih) + sizeof(arg),
1014         };
1016         spin_unlock(&fc->lock);
1017         kfree(forget);
1018         if (nbytes < ih.len)
1019                 return -EINVAL;
1021         err = fuse_copy_one(cs, &ih, sizeof(ih));
1022         if (!err)
1023                 err = fuse_copy_one(cs, &arg, sizeof(arg));
1024         fuse_copy_finish(cs);
1026         if (err)
1027                 return err;
1029         return ih.len;
1032 static int fuse_read_batch_forget(struct fuse_conn *fc,
1033                                    struct fuse_copy_state *cs, size_t nbytes)
1034 __releases(fc->lock)
1036         int err;
1037         unsigned max_forgets;
1038         unsigned count;
1039         struct fuse_forget_link *head;
1040         struct fuse_batch_forget_in arg = { .count = 0 };
1041         struct fuse_in_header ih = {
1042                 .opcode = FUSE_BATCH_FORGET,
1043                 .unique = fuse_get_unique(fc),
1044                 .len = sizeof(ih) + sizeof(arg),
1045         };
1047         if (nbytes < ih.len) {
1048                 spin_unlock(&fc->lock);
1049                 return -EINVAL;
1050         }
1052         max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1053         head = dequeue_forget(fc, max_forgets, &count);
1054         spin_unlock(&fc->lock);
1056         arg.count = count;
1057         ih.len += count * sizeof(struct fuse_forget_one);
1058         err = fuse_copy_one(cs, &ih, sizeof(ih));
1059         if (!err)
1060                 err = fuse_copy_one(cs, &arg, sizeof(arg));
1062         while (head) {
1063                 struct fuse_forget_link *forget = head;
1065                 if (!err) {
1066                         err = fuse_copy_one(cs, &forget->forget_one,
1067                                             sizeof(forget->forget_one));
1068                 }
1069                 head = forget->next;
1070                 kfree(forget);
1071         }
1073         fuse_copy_finish(cs);
1075         if (err)
1076                 return err;
1078         return ih.len;
1081 static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
1082                             size_t nbytes)
1083 __releases(fc->lock)
1085         if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
1086                 return fuse_read_single_forget(fc, cs, nbytes);
1087         else
1088                 return fuse_read_batch_forget(fc, cs, nbytes);
1091 /*
1092  * Read a single request into the userspace filesystem's buffer.  This
1093  * function waits until a request is available, then removes it from
1094  * the pending list and copies request data to userspace buffer.  If
1095  * no reply is needed (FORGET) or request has been aborted or there
1096  * was an error during the copying then it's finished by calling
1097  * request_end().  Otherwise add it to the processing list, and set
1098  * the 'sent' flag.
1099  */
1100 static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
1101                                 struct fuse_copy_state *cs, size_t nbytes)
1103         int err;
1104         struct fuse_req *req;
1105         struct fuse_in *in;
1106         unsigned reqsize;
1108  restart:
1109         spin_lock(&fc->lock);
1110         err = -EAGAIN;
1111         if ((file->f_flags & O_NONBLOCK) && fc->connected &&
1112             !request_pending(fc))
1113                 goto err_unlock;
1115         request_wait(fc);
1116         err = -ENODEV;
1117         if (!fc->connected)
1118                 goto err_unlock;
1119         err = -ERESTARTSYS;
1120         if (!request_pending(fc))
1121                 goto err_unlock;
1123         if (!list_empty(&fc->interrupts)) {
1124                 req = list_entry(fc->interrupts.next, struct fuse_req,
1125                                  intr_entry);
1126                 return fuse_read_interrupt(fc, cs, nbytes, req);
1127         }
1129         if (forget_pending(fc)) {
1130                 if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
1131                         return fuse_read_forget(fc, cs, nbytes);
1133                 if (fc->forget_batch <= -8)
1134                         fc->forget_batch = 16;
1135         }
1137         req = list_entry(fc->pending.next, struct fuse_req, list);
1138         req->state = FUSE_REQ_READING;
1139         list_move(&req->list, &fc->io);
1141         in = &req->in;
1142         reqsize = in->h.len;
1143         /* If request is too large, reply with an error and restart the read */
1144         if (nbytes < reqsize) {
1145                 req->out.h.error = -EIO;
1146                 /* SETXATTR is special, since it may contain too large data */
1147                 if (in->h.opcode == FUSE_SETXATTR)
1148                         req->out.h.error = -E2BIG;
1149                 request_end(fc, req);
1150                 goto restart;
1151         }
1152         spin_unlock(&fc->lock);
1153         cs->req = req;
1154         err = fuse_copy_one(cs, &in->h, sizeof(in->h));
1155         if (!err)
1156                 err = fuse_copy_args(cs, in->numargs, in->argpages,
1157                                      (struct fuse_arg *) in->args, 0);
1158         fuse_copy_finish(cs);
1159         spin_lock(&fc->lock);
1160         req->locked = 0;
1161         if (req->aborted) {
1162                 request_end(fc, req);
1163                 return -ENODEV;
1164         }
1165         if (err) {
1166                 req->out.h.error = -EIO;
1167                 request_end(fc, req);
1168                 return err;
1169         }
1170         if (!req->isreply)
1171                 request_end(fc, req);
1172         else {
1173                 req->state = FUSE_REQ_SENT;
1174                 list_move_tail(&req->list, &fc->processing);
1175                 if (req->interrupted)
1176                         queue_interrupt(fc, req);
1177                 spin_unlock(&fc->lock);
1178         }
1179         return reqsize;
1181  err_unlock:
1182         spin_unlock(&fc->lock);
1183         return err;
1186 static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1187                               unsigned long nr_segs, loff_t pos)
1189         struct fuse_copy_state cs;
1190         struct file *file = iocb->ki_filp;
1191         struct fuse_conn *fc = fuse_get_conn(file);
1192         if (!fc)
1193                 return -EPERM;
1195         fuse_copy_init(&cs, fc, 1, iov, nr_segs);
1197         return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
1200 static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
1201                                    struct pipe_buffer *buf)
1203         return 1;
1206 static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
1207         .can_merge = 0,
1208         .map = generic_pipe_buf_map,
1209         .unmap = generic_pipe_buf_unmap,
1210         .confirm = generic_pipe_buf_confirm,
1211         .release = generic_pipe_buf_release,
1212         .steal = fuse_dev_pipe_buf_steal,
1213         .get = generic_pipe_buf_get,
1214 };
1216 static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1217                                     struct pipe_inode_info *pipe,
1218                                     size_t len, unsigned int flags)
1220         int ret;
1221         int page_nr = 0;
1222         int do_wakeup = 0;
1223         struct pipe_buffer *bufs;
1224         struct fuse_copy_state cs;
1225         struct fuse_conn *fc = fuse_get_conn(in);
1226         if (!fc)
1227                 return -EPERM;
1229         bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1230         if (!bufs)
1231                 return -ENOMEM;
1233         fuse_copy_init(&cs, fc, 1, NULL, 0);
1234         cs.pipebufs = bufs;
1235         cs.pipe = pipe;
1236         ret = fuse_dev_do_read(fc, in, &cs, len);
1237         if (ret < 0)
1238                 goto out;
1240         ret = 0;
1241         pipe_lock(pipe);
1243         if (!pipe->readers) {
1244                 send_sig(SIGPIPE, current, 0);
1245                 if (!ret)
1246                         ret = -EPIPE;
1247                 goto out_unlock;
1248         }
1250         if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1251                 ret = -EIO;
1252                 goto out_unlock;
1253         }
1255         while (page_nr < cs.nr_segs) {
1256                 int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1257                 struct pipe_buffer *buf = pipe->bufs + newbuf;
1259                 buf->page = bufs[page_nr].page;
1260                 buf->offset = bufs[page_nr].offset;
1261                 buf->len = bufs[page_nr].len;
1262                 buf->ops = &fuse_dev_pipe_buf_ops;
1264                 pipe->nrbufs++;
1265                 page_nr++;
1266                 ret += buf->len;
1268                 if (pipe->inode)
1269                         do_wakeup = 1;
1270         }
1272 out_unlock:
1273         pipe_unlock(pipe);
1275         if (do_wakeup) {
1276                 smp_mb();
1277                 if (waitqueue_active(&pipe->wait))
1278                         wake_up_interruptible(&pipe->wait);
1279                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
1280         }
1282 out:
1283         for (; page_nr < cs.nr_segs; page_nr++)
1284                 page_cache_release(bufs[page_nr].page);
1286         kfree(bufs);
1287         return ret;
1290 static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1291                             struct fuse_copy_state *cs)
1293         struct fuse_notify_poll_wakeup_out outarg;
1294         int err = -EINVAL;
1296         if (size != sizeof(outarg))
1297                 goto err;
1299         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1300         if (err)
1301                 goto err;
1303         fuse_copy_finish(cs);
1304         return fuse_notify_poll_wakeup(fc, &outarg);
1306 err:
1307         fuse_copy_finish(cs);
1308         return err;
1311 static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1312                                    struct fuse_copy_state *cs)
1314         struct fuse_notify_inval_inode_out outarg;
1315         int err = -EINVAL;
1317         if (size != sizeof(outarg))
1318                 goto err;
1320         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1321         if (err)
1322                 goto err;
1323         fuse_copy_finish(cs);
1325         down_read(&fc->killsb);
1326         err = -ENOENT;
1327         if (fc->sb) {
1328                 err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
1329                                                outarg.off, outarg.len);
1330         }
1331         up_read(&fc->killsb);
1332         return err;
1334 err:
1335         fuse_copy_finish(cs);
1336         return err;
1339 static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1340                                    struct fuse_copy_state *cs)
1342         struct fuse_notify_inval_entry_out outarg;
1343         int err = -ENOMEM;
1344         char *buf;
1345         struct qstr name;
1347         buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1348         if (!buf)
1349                 goto err;
1351         err = -EINVAL;
1352         if (size < sizeof(outarg))
1353                 goto err;
1355         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1356         if (err)
1357                 goto err;
1359         err = -ENAMETOOLONG;
1360         if (outarg.namelen > FUSE_NAME_MAX)
1361                 goto err;
1363         err = -EINVAL;
1364         if (size != sizeof(outarg) + outarg.namelen + 1)
1365                 goto err;
1367         name.name = buf;
1368         name.len = outarg.namelen;
1369         err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1370         if (err)
1371                 goto err;
1372         fuse_copy_finish(cs);
1373         buf[outarg.namelen] = 0;
1374         name.hash = full_name_hash(name.name, name.len);
1376         down_read(&fc->killsb);
1377         err = -ENOENT;
1378         if (fc->sb)
1379                 err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
1380         up_read(&fc->killsb);
1381         kfree(buf);
1382         return err;
1384 err:
1385         kfree(buf);
1386         fuse_copy_finish(cs);
1387         return err;
1390 static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1391                               struct fuse_copy_state *cs)
1393         struct fuse_notify_delete_out outarg;
1394         int err = -ENOMEM;
1395         char *buf;
1396         struct qstr name;
1398         buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1399         if (!buf)
1400                 goto err;
1402         err = -EINVAL;
1403         if (size < sizeof(outarg))
1404                 goto err;
1406         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1407         if (err)
1408                 goto err;
1410         err = -ENAMETOOLONG;
1411         if (outarg.namelen > FUSE_NAME_MAX)
1412                 goto err;
1414         err = -EINVAL;
1415         if (size != sizeof(outarg) + outarg.namelen + 1)
1416                 goto err;
1418         name.name = buf;
1419         name.len = outarg.namelen;
1420         err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1421         if (err)
1422                 goto err;
1423         fuse_copy_finish(cs);
1424         buf[outarg.namelen] = 0;
1425         name.hash = full_name_hash(name.name, name.len);
1427         down_read(&fc->killsb);
1428         err = -ENOENT;
1429         if (fc->sb)
1430                 err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
1431                                                outarg.child, &name);
1432         up_read(&fc->killsb);
1433         kfree(buf);
1434         return err;
1436 err:
1437         kfree(buf);
1438         fuse_copy_finish(cs);
1439         return err;
1442 static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1443                              struct fuse_copy_state *cs)
1445         struct fuse_notify_store_out outarg;
1446         struct inode *inode;
1447         struct address_space *mapping;
1448         u64 nodeid;
1449         int err;
1450         pgoff_t index;
1451         unsigned int offset;
1452         unsigned int num;
1453         loff_t file_size;
1454         loff_t end;
1456         err = -EINVAL;
1457         if (size < sizeof(outarg))
1458                 goto out_finish;
1460         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1461         if (err)
1462                 goto out_finish;
1464         err = -EINVAL;
1465         if (size - sizeof(outarg) != outarg.size)
1466                 goto out_finish;
1468         nodeid = outarg.nodeid;
1470         down_read(&fc->killsb);
1472         err = -ENOENT;
1473         if (!fc->sb)
1474                 goto out_up_killsb;
1476         inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1477         if (!inode)
1478                 goto out_up_killsb;
1480         mapping = inode->i_mapping;
1481         index = outarg.offset >> PAGE_CACHE_SHIFT;
1482         offset = outarg.offset & ~PAGE_CACHE_MASK;
1483         file_size = i_size_read(inode);
1484         end = outarg.offset + outarg.size;
1485         if (end > file_size) {
1486                 file_size = end;
1487                 fuse_write_update_size(inode, file_size);
1488         }
1490         num = outarg.size;
1491         while (num) {
1492                 struct page *page;
1493                 unsigned int this_num;
1495                 err = -ENOMEM;
1496                 page = find_or_create_page(mapping, index,
1497                                            mapping_gfp_mask(mapping));
1498                 if (!page)
1499                         goto out_iput;
1501                 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1502                 err = fuse_copy_page(cs, &page, offset, this_num, 0);
1503                 if (!err && offset == 0 && (num != 0 || file_size == end))
1504                         SetPageUptodate(page);
1505                 unlock_page(page);
1506                 page_cache_release(page);
1508                 if (err)
1509                         goto out_iput;
1511                 num -= this_num;
1512                 offset = 0;
1513                 index++;
1514         }
1516         err = 0;
1518 out_iput:
1519         iput(inode);
1520 out_up_killsb:
1521         up_read(&fc->killsb);
1522 out_finish:
1523         fuse_copy_finish(cs);
1524         return err;
1527 static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1529         release_pages(req->pages, req->num_pages, 0);
1532 static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1533                          struct fuse_notify_retrieve_out *outarg)
1535         int err;
1536         struct address_space *mapping = inode->i_mapping;
1537         struct fuse_req *req;
1538         pgoff_t index;
1539         loff_t file_size;
1540         unsigned int num;
1541         unsigned int offset;
1542         size_t total_len = 0;
1544         req = fuse_get_req(fc);
1545         if (IS_ERR(req))
1546                 return PTR_ERR(req);
1548         offset = outarg->offset & ~PAGE_CACHE_MASK;
1550         req->in.h.opcode = FUSE_NOTIFY_REPLY;
1551         req->in.h.nodeid = outarg->nodeid;
1552         req->in.numargs = 2;
1553         req->in.argpages = 1;
1554         req->page_offset = offset;
1555         req->end = fuse_retrieve_end;
1557         index = outarg->offset >> PAGE_CACHE_SHIFT;
1558         file_size = i_size_read(inode);
1559         num = outarg->size;
1560         if (outarg->offset > file_size)
1561                 num = 0;
1562         else if (outarg->offset + num > file_size)
1563                 num = file_size - outarg->offset;
1565         while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
1566                 struct page *page;
1567                 unsigned int this_num;
1569                 page = find_get_page(mapping, index);
1570                 if (!page)
1571                         break;
1573                 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1574                 req->pages[req->num_pages] = page;
1575                 req->num_pages++;
1577                 offset = 0;
1578                 num -= this_num;
1579                 total_len += this_num;
1580                 index++;
1581         }
1582         req->misc.retrieve_in.offset = outarg->offset;
1583         req->misc.retrieve_in.size = total_len;
1584         req->in.args[0].size = sizeof(req->misc.retrieve_in);
1585         req->in.args[0].value = &req->misc.retrieve_in;
1586         req->in.args[1].size = total_len;
1588         err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1589         if (err)
1590                 fuse_retrieve_end(fc, req);
1592         return err;
1595 static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1596                                 struct fuse_copy_state *cs)
1598         struct fuse_notify_retrieve_out outarg;
1599         struct inode *inode;
1600         int err;
1602         err = -EINVAL;
1603         if (size != sizeof(outarg))
1604                 goto copy_finish;
1606         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1607         if (err)
1608                 goto copy_finish;
1610         fuse_copy_finish(cs);
1612         down_read(&fc->killsb);
1613         err = -ENOENT;
1614         if (fc->sb) {
1615                 u64 nodeid = outarg.nodeid;
1617                 inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1618                 if (inode) {
1619                         err = fuse_retrieve(fc, inode, &outarg);
1620                         iput(inode);
1621                 }
1622         }
1623         up_read(&fc->killsb);
1625         return err;
1627 copy_finish:
1628         fuse_copy_finish(cs);
1629         return err;
1632 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1633                        unsigned int size, struct fuse_copy_state *cs)
1635         switch (code) {
1636         case FUSE_NOTIFY_POLL:
1637                 return fuse_notify_poll(fc, size, cs);
1639         case FUSE_NOTIFY_INVAL_INODE:
1640                 return fuse_notify_inval_inode(fc, size, cs);
1642         case FUSE_NOTIFY_INVAL_ENTRY:
1643                 return fuse_notify_inval_entry(fc, size, cs);
1645         case FUSE_NOTIFY_STORE:
1646                 return fuse_notify_store(fc, size, cs);
1648         case FUSE_NOTIFY_RETRIEVE:
1649                 return fuse_notify_retrieve(fc, size, cs);
1651         case FUSE_NOTIFY_DELETE:
1652                 return fuse_notify_delete(fc, size, cs);
1654         default:
1655                 fuse_copy_finish(cs);
1656                 return -EINVAL;
1657         }
1660 /* Look up request on processing list by unique ID */
1661 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
1663         struct list_head *entry;
1665         list_for_each(entry, &fc->processing) {
1666                 struct fuse_req *req;
1667                 req = list_entry(entry, struct fuse_req, list);
1668                 if (req->in.h.unique == unique || req->intr_unique == unique)
1669                         return req;
1670         }
1671         return NULL;
1674 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
1675                          unsigned nbytes)
1677         unsigned reqsize = sizeof(struct fuse_out_header);
1679         if (out->h.error)
1680                 return nbytes != reqsize ? -EINVAL : 0;
1682         reqsize += len_args(out->numargs, out->args);
1684         if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
1685                 return -EINVAL;
1686         else if (reqsize > nbytes) {
1687                 struct fuse_arg *lastarg = &out->args[out->numargs-1];
1688                 unsigned diffsize = reqsize - nbytes;
1689                 if (diffsize > lastarg->size)
1690                         return -EINVAL;
1691                 lastarg->size -= diffsize;
1692         }
1693         return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1694                               out->page_zeroing);
1697 /*
1698  * Write a single reply to a request.  First the header is copied from
1699  * the write buffer.  The request is then searched on the processing
1700  * list by the unique ID found in the header.  If found, then remove
1701  * it from the list and copy the rest of the buffer to the request.
1702  * The request is finished by calling request_end()
1703  */
1704 static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
1705                                  struct fuse_copy_state *cs, size_t nbytes)
1707         int err;
1708         struct fuse_req *req;
1709         struct fuse_out_header oh;
1711         if (nbytes < sizeof(struct fuse_out_header))
1712                 return -EINVAL;
1714         err = fuse_copy_one(cs, &oh, sizeof(oh));
1715         if (err)
1716                 goto err_finish;
1718         err = -EINVAL;
1719         if (oh.len != nbytes)
1720                 goto err_finish;
1722         /*
1723          * Zero oh.unique indicates unsolicited notification message
1724          * and error contains notification code.
1725          */
1726         if (!oh.unique) {
1727                 err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1728                 return err ? err : nbytes;
1729         }
1731         err = -EINVAL;
1732         if (oh.error <= -1000 || oh.error > 0)
1733                 goto err_finish;
1735         spin_lock(&fc->lock);
1736         err = -ENOENT;
1737         if (!fc->connected)
1738                 goto err_unlock;
1740         req = request_find(fc, oh.unique);
1741         if (!req)
1742                 goto err_unlock;
1744         if (req->aborted) {
1745                 spin_unlock(&fc->lock);
1746                 fuse_copy_finish(cs);
1747                 spin_lock(&fc->lock);
1748                 request_end(fc, req);
1749                 return -ENOENT;
1750         }
1751         /* Is it an interrupt reply? */
1752         if (req->intr_unique == oh.unique) {
1753                 err = -EINVAL;
1754                 if (nbytes != sizeof(struct fuse_out_header))
1755                         goto err_unlock;
1757                 if (oh.error == -ENOSYS)
1758                         fc->no_interrupt = 1;
1759                 else if (oh.error == -EAGAIN)
1760                         queue_interrupt(fc, req);
1762                 spin_unlock(&fc->lock);
1763                 fuse_copy_finish(cs);
1764                 return nbytes;
1765         }
1767         req->state = FUSE_REQ_WRITING;
1768         list_move(&req->list, &fc->io);
1769         req->out.h = oh;
1770         req->locked = 1;
1771         cs->req = req;
1772         if (!req->out.page_replace)
1773                 cs->move_pages = 0;
1774         spin_unlock(&fc->lock);
1776         err = copy_out_args(cs, &req->out, nbytes);
1777         fuse_copy_finish(cs);
1779         spin_lock(&fc->lock);
1780         req->locked = 0;
1781         if (!err) {
1782                 if (req->aborted)
1783                         err = -ENOENT;
1784         } else if (!req->aborted)
1785                 req->out.h.error = -EIO;
1786         request_end(fc, req);
1788         return err ? err : nbytes;
1790  err_unlock:
1791         spin_unlock(&fc->lock);
1792  err_finish:
1793         fuse_copy_finish(cs);
1794         return err;
1797 static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
1798                               unsigned long nr_segs, loff_t pos)
1800         struct fuse_copy_state cs;
1801         struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1802         if (!fc)
1803                 return -EPERM;
1805         fuse_copy_init(&cs, fc, 0, iov, nr_segs);
1807         return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
1810 static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1811                                      struct file *out, loff_t *ppos,
1812                                      size_t len, unsigned int flags)
1814         unsigned nbuf;
1815         unsigned idx;
1816         struct pipe_buffer *bufs;
1817         struct fuse_copy_state cs;
1818         struct fuse_conn *fc;
1819         size_t rem;
1820         ssize_t ret;
1822         fc = fuse_get_conn(out);
1823         if (!fc)
1824                 return -EPERM;
1826         bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1827         if (!bufs)
1828                 return -ENOMEM;
1830         pipe_lock(pipe);
1831         nbuf = 0;
1832         rem = 0;
1833         for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
1834                 rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
1836         ret = -EINVAL;
1837         if (rem < len) {
1838                 pipe_unlock(pipe);
1839                 goto out;
1840         }
1842         rem = len;
1843         while (rem) {
1844                 struct pipe_buffer *ibuf;
1845                 struct pipe_buffer *obuf;
1847                 BUG_ON(nbuf >= pipe->buffers);
1848                 BUG_ON(!pipe->nrbufs);
1849                 ibuf = &pipe->bufs[pipe->curbuf];
1850                 obuf = &bufs[nbuf];
1852                 if (rem >= ibuf->len) {
1853                         *obuf = *ibuf;
1854                         ibuf->ops = NULL;
1855                         pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1856                         pipe->nrbufs--;
1857                 } else {
1858                         ibuf->ops->get(pipe, ibuf);
1859                         *obuf = *ibuf;
1860                         obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1861                         obuf->len = rem;
1862                         ibuf->offset += obuf->len;
1863                         ibuf->len -= obuf->len;
1864                 }
1865                 nbuf++;
1866                 rem -= obuf->len;
1867         }
1868         pipe_unlock(pipe);
1870         fuse_copy_init(&cs, fc, 0, NULL, nbuf);
1871         cs.pipebufs = bufs;
1872         cs.pipe = pipe;
1874         if (flags & SPLICE_F_MOVE)
1875                 cs.move_pages = 1;
1877         ret = fuse_dev_do_write(fc, &cs, len);
1879         for (idx = 0; idx < nbuf; idx++) {
1880                 struct pipe_buffer *buf = &bufs[idx];
1881                 buf->ops->release(pipe, buf);
1882         }
1883 out:
1884         kfree(bufs);
1885         return ret;
1888 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
1890         unsigned mask = POLLOUT | POLLWRNORM;
1891         struct fuse_conn *fc = fuse_get_conn(file);
1892         if (!fc)
1893                 return POLLERR;
1895         poll_wait(file, &fc->waitq, wait);
1897         spin_lock(&fc->lock);
1898         if (!fc->connected)
1899                 mask = POLLERR;
1900         else if (request_pending(fc))
1901                 mask |= POLLIN | POLLRDNORM;
1902         spin_unlock(&fc->lock);
1904         return mask;
1907 /*
1908  * Abort all requests on the given list (pending or processing)
1909  *
1910  * This function releases and reacquires fc->lock
1911  */
1912 static void end_requests(struct fuse_conn *fc, struct list_head *head)
1913 __releases(fc->lock)
1914 __acquires(fc->lock)
1916         while (!list_empty(head)) {
1917                 struct fuse_req *req;
1918                 req = list_entry(head->next, struct fuse_req, list);
1919                 req->out.h.error = -ECONNABORTED;
1920                 request_end(fc, req);
1921                 spin_lock(&fc->lock);
1922         }
1925 /*
1926  * Abort requests under I/O
1927  *
1928  * The requests are set to aborted and finished, and the request
1929  * waiter is woken up.  This will make request_wait_answer() wait
1930  * until the request is unlocked and then return.
1931  *
1932  * If the request is asynchronous, then the end function needs to be
1933  * called after waiting for the request to be unlocked (if it was
1934  * locked).
1935  */
1936 static void end_io_requests(struct fuse_conn *fc)
1937 __releases(fc->lock)
1938 __acquires(fc->lock)
1940         while (!list_empty(&fc->io)) {
1941                 struct fuse_req *req =
1942                         list_entry(fc->io.next, struct fuse_req, list);
1943                 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
1945                 req->aborted = 1;
1946                 req->out.h.error = -ECONNABORTED;
1947                 req->state = FUSE_REQ_FINISHED;
1948                 list_del_init(&req->list);
1949                 wake_up(&req->waitq);
1950                 if (end) {
1951                         req->end = NULL;
1952                         __fuse_get_request(req);
1953                         spin_unlock(&fc->lock);
1954                         wait_event(req->waitq, !req->locked);
1955                         end(fc, req);
1956                         fuse_put_request(fc, req);
1957                         spin_lock(&fc->lock);
1958                 }
1959         }
1962 static void end_queued_requests(struct fuse_conn *fc)
1963 __releases(fc->lock)
1964 __acquires(fc->lock)
1966         fc->max_background = UINT_MAX;
1967         flush_bg_queue(fc);
1968         end_requests(fc, &fc->pending);
1969         end_requests(fc, &fc->processing);
1970         while (forget_pending(fc))
1971                 kfree(dequeue_forget(fc, 1, NULL));
1974 static void end_polls(struct fuse_conn *fc)
1976         struct rb_node *p;
1978         p = rb_first(&fc->polled_files);
1980         while (p) {
1981                 struct fuse_file *ff;
1982                 ff = rb_entry(p, struct fuse_file, polled_node);
1983                 wake_up_interruptible_all(&ff->poll_wait);
1985                 p = rb_next(p);
1986         }
1989 /*
1990  * Abort all requests.
1991  *
1992  * Emergency exit in case of a malicious or accidental deadlock, or
1993  * just a hung filesystem.
1994  *
1995  * The same effect is usually achievable through killing the
1996  * filesystem daemon and all users of the filesystem.  The exception
1997  * is the combination of an asynchronous request and the tricky
1998  * deadlock (see Documentation/filesystems/fuse.txt).
1999  *
2000  * During the aborting, progression of requests from the pending and
2001  * processing lists onto the io list, and progression of new requests
2002  * onto the pending list is prevented by req->connected being false.
2003  *
2004  * Progression of requests under I/O to the processing list is
2005  * prevented by the req->aborted flag being true for these requests.
2006  * For this reason requests on the io list must be aborted first.
2007  */
2008 void fuse_abort_conn(struct fuse_conn *fc)
2010         spin_lock(&fc->lock);
2011         if (fc->connected) {
2012                 fc->connected = 0;
2013                 fc->blocked = 0;
2014                 end_io_requests(fc);
2015                 end_queued_requests(fc);
2016                 end_polls(fc);
2017                 wake_up_all(&fc->waitq);
2018                 wake_up_all(&fc->blocked_waitq);
2019                 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
2020         }
2021         spin_unlock(&fc->lock);
2023 EXPORT_SYMBOL_GPL(fuse_abort_conn);
2025 int fuse_dev_release(struct inode *inode, struct file *file)
2027         struct fuse_conn *fc = fuse_get_conn(file);
2028         if (fc) {
2029                 spin_lock(&fc->lock);
2030                 fc->connected = 0;
2031                 fc->blocked = 0;
2032                 end_queued_requests(fc);
2033                 end_polls(fc);
2034                 wake_up_all(&fc->blocked_waitq);
2035                 spin_unlock(&fc->lock);
2036                 fuse_conn_put(fc);
2037         }
2039         return 0;
2041 EXPORT_SYMBOL_GPL(fuse_dev_release);
2043 static int fuse_dev_fasync(int fd, struct file *file, int on)
2045         struct fuse_conn *fc = fuse_get_conn(file);
2046         if (!fc)
2047                 return -EPERM;
2049         /* No locking - fasync_helper does its own locking */
2050         return fasync_helper(fd, file, on, &fc->fasync);
2053 const struct file_operations fuse_dev_operations = {
2054         .owner          = THIS_MODULE,
2055         .llseek         = no_llseek,
2056         .read           = do_sync_read,
2057         .aio_read       = fuse_dev_read,
2058         .splice_read    = fuse_dev_splice_read,
2059         .write          = do_sync_write,
2060         .aio_write      = fuse_dev_write,
2061         .splice_write   = fuse_dev_splice_write,
2062         .poll           = fuse_dev_poll,
2063         .release        = fuse_dev_release,
2064         .fasync         = fuse_dev_fasync,
2065 };
2066 EXPORT_SYMBOL_GPL(fuse_dev_operations);
2068 static struct miscdevice fuse_miscdevice = {
2069         .minor = FUSE_MINOR,
2070         .name  = "fuse",
2071         .fops = &fuse_dev_operations,
2072 };
2074 int __init fuse_dev_init(void)
2076         int err = -ENOMEM;
2077         fuse_req_cachep = kmem_cache_create("fuse_request",
2078                                             sizeof(struct fuse_req),
2079                                             0, 0, NULL);
2080         if (!fuse_req_cachep)
2081                 goto out;
2083         err = misc_register(&fuse_miscdevice);
2084         if (err)
2085                 goto out_cache_clean;
2087         return 0;
2089  out_cache_clean:
2090         kmem_cache_destroy(fuse_req_cachep);
2091  out:
2092         return err;
2095 void fuse_dev_cleanup(void)
2097         misc_deregister(&fuse_miscdevice);
2098         kmem_cache_destroy(fuse_req_cachep);