diff options
Diffstat (limited to 'fs')
105 files changed, 2847 insertions, 886 deletions
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index eb82ee53ee0b..d9a43674cb94 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -125,9 +125,8 @@ static void affs_fix_dcache(struct inode *inode, u32 entry_ino) { struct dentry *dentry; - struct hlist_node *p; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (entry_ino == (u32)(long)dentry->d_fsdata) { dentry->d_fsdata = (void *)inode->i_ino; break; @@ -591,11 +591,10 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) { struct mm_struct *mm = current->mm; struct kioctx *ctx, *ret = NULL; - struct hlist_node *n; rcu_read_lock(); - hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) { + hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) { /* * RCU protects us against accessing freed memory but * we have to be careful not to get a reference when the diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 230bd2aad4f4..9bd16255dd9c 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -383,8 +383,10 @@ static struct vfsmount *autofs4_d_automount(struct path *path) goto done; } } else { - if (!simple_empty(dentry)) + if (!simple_empty(dentry)) { + spin_unlock(&sbi->fs_lock); goto done; + } } ino->flags |= AUTOFS_INF_PENDING; spin_unlock(&sbi->fs_lock); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 03bc1d347d8e..3db70dae40d3 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -42,10 +42,8 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi) while (wq) { nwq = wq->next; wq->status = -ENOENT; /* Magic is gone - report failure */ - if (wq->name.name) { - kfree(wq->name.name); - wq->name.name = NULL; - } + kfree(wq->name.name); + wq->name.name = NULL; wq->wait_ctr--; wake_up_interruptible(&wq->queue); wq = nwq; @@ -1428,6 +1428,8 @@ void bio_endio(struct bio *bio, int error) else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) error = -EIO; + trace_block_bio_complete(bio, error); + if (bio->bi_end_io) bio->bi_end_io(bio, error); } diff --git a/fs/block_dev.c b/fs/block_dev.c index 53f5fae5cfbe..aea605c98ba6 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1033,7 +1033,9 @@ void bd_set_size(struct block_device *bdev, loff_t size) { unsigned bsize = bdev_logical_block_size(bdev); - bdev->bd_inode->i_size = size; + mutex_lock(&bdev->bd_inode->i_mutex); + i_size_write(bdev->bd_inode, size); + mutex_unlock(&bdev->bd_inode->i_mutex); while (bsize < PAGE_CACHE_SIZE) { if (size & bsize) break; @@ -1118,7 +1120,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) } } - if (!ret && !bdev->bd_openers) { + if (!ret) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); bdi = blk_get_backing_dev_info(bdev); if (bdi == NULL) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1e59ed575cc9..cf54bdfee334 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3689,20 +3689,6 @@ static int can_overcommit(struct btrfs_root *root, return 0; } -static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb, - unsigned long nr_pages, - enum wb_reason reason) -{ - if (!writeback_in_progress(sb->s_bdi) && - down_read_trylock(&sb->s_umount)) { - writeback_inodes_sb_nr(sb, nr_pages, reason); - up_read(&sb->s_umount); - return 1; - } - - return 0; -} - /* * shrink metadata reservation for delalloc */ @@ -3735,9 +3721,9 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, while (delalloc_bytes && loops < 3) { max_reclaim = min(delalloc_bytes, to_reclaim); nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; - writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb, - nr_pages, - WB_REASON_FS_FREE_SPACE); + try_to_writeback_inodes_sb_nr(root->fs_info->sb, + nr_pages, + WB_REASON_FS_FREE_SPACE); /* * We need to wait for the async pages to actually start before diff --git a/fs/buffer.c b/fs/buffer.c index 8e18281b4077..b4dcb34c9635 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -41,6 +41,7 @@ #include <linux/bitops.h> #include <linux/mpage.h> #include <linux/bit_spinlock.h> +#include <trace/events/block.h> static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); @@ -53,6 +54,13 @@ void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) } EXPORT_SYMBOL(init_buffer); +inline void touch_buffer(struct buffer_head *bh) +{ + trace_block_touch_buffer(bh); + mark_page_accessed(bh->b_page); +} +EXPORT_SYMBOL(touch_buffer); + static int sleep_on_buffer(void *word) { io_schedule(); @@ -1113,6 +1121,8 @@ void mark_buffer_dirty(struct buffer_head *bh) { WARN_ON_ONCE(!buffer_uptodate(bh)); + trace_block_dirty_buffer(bh); + /* * Very *carefully* optimize the it-is-already-dirty case. * diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index d4f81edd9a5d..a60ea977af6f 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -236,16 +236,10 @@ static int ceph_readpage(struct file *filp, struct page *page) static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg) { struct inode *inode = req->r_inode; - struct ceph_osd_reply_head *replyhead; - int rc, bytes; + int rc = req->r_result; + int bytes = le32_to_cpu(msg->hdr.data_len); int i; - /* parse reply */ - replyhead = msg->front.iov_base; - WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); - rc = le32_to_cpu(replyhead->result); - bytes = le32_to_cpu(msg->hdr.data_len); - dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes); /* unlock all pages, zeroing any data we didn't read */ @@ -315,7 +309,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, ci->i_truncate_seq, ci->i_truncate_size, - NULL, false, 1, 0); + NULL, false, 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -492,8 +486,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) &ci->i_layout, snapc, page_off, len, ci->i_truncate_seq, ci->i_truncate_size, - &inode->i_mtime, - &page, 1, 0, 0, true); + &inode->i_mtime, &page, 1); if (err < 0) { dout("writepage setting page/mapping error %d %p\n", err, page); SetPageError(page); @@ -554,27 +547,18 @@ static void writepages_finish(struct ceph_osd_request *req, struct ceph_msg *msg) { struct inode *inode = req->r_inode; - struct ceph_osd_reply_head *replyhead; - struct ceph_osd_op *op; struct ceph_inode_info *ci = ceph_inode(inode); unsigned wrote; struct page *page; int i; struct ceph_snap_context *snapc = req->r_snapc; struct address_space *mapping = inode->i_mapping; - __s32 rc = -EIO; - u64 bytes = 0; + int rc = req->r_result; + u64 bytes = le64_to_cpu(req->r_request_ops[0].extent.length); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); long writeback_stat; unsigned issued = ceph_caps_issued(ci); - /* parse reply */ - replyhead = msg->front.iov_base; - WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); - op = (void *)(replyhead + 1); - rc = le32_to_cpu(replyhead->result); - bytes = le64_to_cpu(op->extent.length); - if (rc >= 0) { /* * Assume we wrote the pages we originally sent. The @@ -741,8 +725,6 @@ retry: struct page *page; int want; u64 offset, len; - struct ceph_osd_request_head *reqhead; - struct ceph_osd_op *op; long writeback_stat; next = 0; @@ -838,7 +820,7 @@ get_more_pages: snapc, do_sync, ci->i_truncate_seq, ci->i_truncate_size, - &inode->i_mtime, true, 1, 0); + &inode->i_mtime, true, 0); if (IS_ERR(req)) { rc = PTR_ERR(req); @@ -906,10 +888,8 @@ get_more_pages: /* revise final length, page count */ req->r_num_pages = locked_pages; - reqhead = req->r_request->front.iov_base; - op = (void *)(reqhead + 1); - op->extent.length = cpu_to_le64(len); - op->payload_len = cpu_to_le32(len); + req->r_request_ops[0].extent.length = cpu_to_le64(len); + req->r_request_ops[0].payload_len = cpu_to_le32(len); req->r_request->hdr.data_len = cpu_to_le32(len); rc = ceph_osdc_start_request(&fsc->client->osdc, req, true); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ae2be696eb5b..78e2f575247d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -611,8 +611,16 @@ retry: if (flags & CEPH_CAP_FLAG_AUTH) ci->i_auth_cap = cap; - else if (ci->i_auth_cap == cap) + else if (ci->i_auth_cap == cap) { ci->i_auth_cap = NULL; + spin_lock(&mdsc->cap_dirty_lock); + if (!list_empty(&ci->i_dirty_item)) { + dout(" moving %p to cap_dirty_migrating\n", inode); + list_move(&ci->i_dirty_item, + &mdsc->cap_dirty_migrating); + } + spin_unlock(&mdsc->cap_dirty_lock); + } dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n", inode, ceph_vinop(inode), cap, ceph_cap_string(issued), @@ -1460,7 +1468,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = &ci->vfs_inode; struct ceph_cap *cap; - int file_wanted, used; + int file_wanted, used, cap_used; int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ int issued, implemented, want, retain, revoking, flushing = 0; int mds = -1; /* keep track of how far we've gone through i_caps list @@ -1563,9 +1571,14 @@ retry_locked: /* NOTE: no side-effects allowed, until we take s_mutex */ + cap_used = used; + if (ci->i_auth_cap && cap != ci->i_auth_cap) + cap_used &= ~ci->i_auth_cap->issued; + revoking = cap->implemented & ~cap->issued; - dout(" mds%d cap %p issued %s implemented %s revoking %s\n", + dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n", cap->mds, cap, ceph_cap_string(cap->issued), + ceph_cap_string(cap_used), ceph_cap_string(cap->implemented), ceph_cap_string(revoking)); @@ -1593,7 +1606,7 @@ retry_locked: } /* completed revocation? going down and there are no caps? */ - if (revoking && (revoking & used) == 0) { + if (revoking && (revoking & cap_used) == 0) { dout("completed revocation of %s\n", ceph_cap_string(cap->implemented & ~cap->issued)); goto ack; @@ -1670,8 +1683,8 @@ ack: sent++; /* __send_cap drops i_ceph_lock */ - delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want, - retain, flushing, NULL); + delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used, + want, retain, flushing, NULL); goto retry; /* retake i_ceph_lock and restart our cap scan. */ } @@ -2417,7 +2430,9 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, dout("mds wanted %s -> %s\n", ceph_cap_string(le32_to_cpu(grant->wanted)), ceph_cap_string(wanted)); - grant->wanted = cpu_to_le32(wanted); + /* imported cap may not have correct mds_wanted */ + if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) + check_caps = 1; } cap->seq = seq; @@ -2821,6 +2836,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, (unsigned)seq); + if (op == CEPH_CAP_OP_IMPORT) + ceph_add_cap_releases(mdsc, session); + /* lookup ino */ inode = ceph_find_inode(sb, vino); ci = ceph_inode(inode); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 11b57c2c8f15..bf338d9b67e3 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -243,6 +243,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, err = ceph_mdsc_do_request(mdsc, (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, req); + if (err) + goto out_err; + err = ceph_handle_snapdir(req, dentry, err); if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); @@ -263,6 +266,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, err = finish_no_open(file, dn); } else { dout("atomic_open finish_open on dn %p\n", dn); + if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) { + *opened |= FILE_CREATED; + } err = finish_open(file, dentry, ceph_open, opened); } @@ -535,7 +541,7 @@ more: ci->i_snap_realm->cached_context, do_sync, ci->i_truncate_seq, ci->i_truncate_size, - &mtime, false, 2, page_align); + &mtime, false, page_align); if (IS_ERR(req)) return PTR_ERR(req); diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index f5ed767806df..4a989345b37b 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -185,7 +185,6 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) &ceph_sb_to_client(inode->i_sb)->client->osdc; u64 len = 1, olen; u64 tmp; - struct ceph_object_layout ol; struct ceph_pg pgid; int r; @@ -194,7 +193,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) return -EFAULT; down_read(&osdc->map_sem); - r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len, + r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len, &dl.object_no, &dl.object_offset, &olen); if (r < 0) @@ -209,10 +208,9 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx", ceph_ino(inode), dl.object_no); - ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout, + ceph_calc_object_layout(&pgid, dl.object_name, &ci->i_layout, osdc->osdmap); - pgid = ol.ol_pgid; dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); if (dl.osd >= 0) { struct ceph_entity_addr *a = diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 7a3dfe0a9a80..442880d099c9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -233,6 +233,30 @@ bad: } /* + * parse create results + */ +static int parse_reply_info_create(void **p, void *end, + struct ceph_mds_reply_info_parsed *info, + int features) +{ + if (features & CEPH_FEATURE_REPLY_CREATE_INODE) { + if (*p == end) { + info->has_create_ino = false; + } else { + info->has_create_ino = true; + info->ino = ceph_decode_64(p); + } + } + + if (unlikely(*p != end)) + goto bad; + return 0; + +bad: + return -EIO; +} + +/* * parse extra results */ static int parse_reply_info_extra(void **p, void *end, @@ -241,8 +265,12 @@ static int parse_reply_info_extra(void **p, void *end, { if (info->head->op == CEPH_MDS_OP_GETFILELOCK) return parse_reply_info_filelock(p, end, info, features); - else + else if (info->head->op == CEPH_MDS_OP_READDIR) return parse_reply_info_dir(p, end, info, features); + else if (info->head->op == CEPH_MDS_OP_CREATE) + return parse_reply_info_create(p, end, info, features); + else + return -EIO; } /* @@ -2170,7 +2198,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) mutex_lock(&req->r_fill_mutex); err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); if (err == 0) { - if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK && + if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || + req->r_op == CEPH_MDS_OP_LSSNAP) && rinfo->dir_nr) ceph_readdir_prepopulate(req, req->r_session); ceph_unreserve_caps(mdsc, &req->r_caps_reservation); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index ff4188bf6199..c2a19fbbe517 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -74,6 +74,12 @@ struct ceph_mds_reply_info_parsed { struct ceph_mds_reply_info_in *dir_in; u8 dir_complete, dir_end; }; + + /* for create results */ + struct { + bool has_create_ino; + u64 ino; + }; }; /* encoded blob describing snapshot contexts for certain diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 73b7d44e8a35..0d3c9240c61b 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -59,6 +59,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) return ERR_PTR(-ENOMEM); ceph_decode_16_safe(p, end, version, bad); + if (version > 3) { + pr_warning("got mdsmap version %d > 3, failing", version); + goto bad; + } ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad); m->m_epoch = ceph_decode_32(p); @@ -144,13 +148,13 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) /* pg_pools */ ceph_decode_32_safe(p, end, n, bad); m->m_num_data_pg_pools = n; - m->m_data_pg_pools = kcalloc(n, sizeof(u32), GFP_NOFS); + m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS); if (!m->m_data_pg_pools) goto badmem; - ceph_decode_need(p, end, sizeof(u32)*(n+1), bad); + ceph_decode_need(p, end, sizeof(u64)*(n+1), bad); for (i = 0; i < n; i++) - m->m_data_pg_pools[i] = ceph_decode_32(p); - m->m_cas_pg_pool = ceph_decode_32(p); + m->m_data_pg_pools[i] = ceph_decode_64(p); + m->m_cas_pg_pool = ceph_decode_64(p); /* ok, we don't care about the rest. */ dout("mdsmap_decode success epoch %u\n", m->m_epoch); diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c index cd5097d7c804..89fa4a940a0f 100644 --- a/fs/ceph/strings.c +++ b/fs/ceph/strings.c @@ -15,6 +15,7 @@ const char *ceph_mds_state_name(int s) case CEPH_MDS_STATE_BOOT: return "up:boot"; case CEPH_MDS_STATE_STANDBY: return "up:standby"; case CEPH_MDS_STATE_STANDBY_REPLAY: return "up:standby-replay"; + case CEPH_MDS_STATE_REPLAYONCE: return "up:oneshot-replay"; case CEPH_MDS_STATE_CREATING: return "up:creating"; case CEPH_MDS_STATE_STARTING: return "up:starting"; /* up and in */ @@ -50,10 +51,13 @@ const char *ceph_mds_op_name(int op) case CEPH_MDS_OP_LOOKUP: return "lookup"; case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent"; + case CEPH_MDS_OP_LOOKUPINO: return "lookupino"; case CEPH_MDS_OP_GETATTR: return "getattr"; case CEPH_MDS_OP_SETXATTR: return "setxattr"; case CEPH_MDS_OP_SETATTR: return "setattr"; case CEPH_MDS_OP_RMXATTR: return "rmxattr"; + case CEPH_MDS_OP_SETLAYOUT: return "setlayou"; + case CEPH_MDS_OP_SETDIRLAYOUT: return "setdirlayout"; case CEPH_MDS_OP_READDIR: return "readdir"; case CEPH_MDS_OP_MKNOD: return "mknod"; case CEPH_MDS_OP_LINK: return "link"; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index e86aa9948124..9fe17c6c2876 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -71,8 +71,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) /* * express utilization in terms of large blocks to avoid * overflow on 32-bit machines. + * + * NOTE: for the time being, we make bsize == frsize to humor + * not-yet-ancient versions of glibc that are broken. + * Someday, we will probably want to report a real block + * size... whatever that may mean for a network file system! */ buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; + buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); @@ -80,7 +86,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = le64_to_cpu(st.num_objects); buf->f_ffree = -1; buf->f_namelen = NAME_MAX; - buf->f_frsize = PAGE_CACHE_SIZE; /* leave fsid little-endian, regardless of host endianness */ fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index f053bbd1886f..c7b309723dcc 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -21,7 +21,7 @@ /* large granularity for statfs utilization stats to facilitate * large volume sizes on 32-bit machines. */ -#define CEPH_BLOCK_SHIFT 20 /* 1 MB */ +#define CEPH_BLOCK_SHIFT 22 /* 4 MB */ #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ @@ -798,13 +798,7 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); /* file.c */ extern const struct file_operations ceph_file_fops; extern const struct address_space_operations ceph_aops; -extern int ceph_copy_to_page_vector(struct page **pages, - const char *data, - loff_t off, size_t len); -extern int ceph_copy_from_page_vector(struct page **pages, - char *data, - loff_t off, size_t len); -extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); + extern int ceph_open(struct inode *inode, struct file *file); extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, umode_t mode, diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 2c2ae5be9902..9b6b2b6dd164 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -29,9 +29,94 @@ struct ceph_vxattr { size_t name_size; /* strlen(name) + 1 (for '\0') */ size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, size_t size); - bool readonly; + bool readonly, hidden; + bool (*exists_cb)(struct ceph_inode_info *ci); }; +/* layouts */ + +static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) +{ + size_t s; + char *p = (char *)&ci->i_layout; + + for (s = 0; s < sizeof(ci->i_layout); s++, p++) + if (*p) + return true; + return false; +} + +static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, + size_t size) +{ + int ret; + struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); + struct ceph_osd_client *osdc = &fsc->client->osdc; + s64 pool = ceph_file_layout_pg_pool(ci->i_layout); + const char *pool_name; + + dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); + down_read(&osdc->map_sem); + pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); + if (pool_name) + ret = snprintf(val, size, + "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s", + (unsigned long long)ceph_file_layout_su(ci->i_layout), + (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), + (unsigned long long)ceph_file_layout_object_size(ci->i_layout), + pool_name); + else + ret = snprintf(val, size, + "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld", + (unsigned long long)ceph_file_layout_su(ci->i_layout), + (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), + (unsigned long long)ceph_file_layout_object_size(ci->i_layout), + (unsigned long long)pool); + + up_read(&osdc->map_sem); + return ret; +} + +static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%lld", + (unsigned long long)ceph_file_layout_su(ci->i_layout)); +} + +static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%lld", + (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout)); +} + +static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%lld", + (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); +} + +static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci, + char *val, size_t size) +{ + int ret; + struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); + struct ceph_osd_client *osdc = &fsc->client->osdc; + s64 pool = ceph_file_layout_pg_pool(ci->i_layout); + const char *pool_name; + + down_read(&osdc->map_sem); + pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); + if (pool_name) + ret = snprintf(val, size, "%s", pool_name); + else + ret = snprintf(val, size, "%lld", (unsigned long long)pool); + up_read(&osdc->map_sem); + return ret; +} + /* directories */ static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, @@ -83,17 +168,43 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, (long)ci->i_rctime.tv_nsec); } -#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name -#define XATTR_NAME_CEPH(_type, _name) \ - { \ - .name = CEPH_XATTR_NAME(_type, _name), \ - .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ - .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ - .readonly = true, \ - } +#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name +#define CEPH_XATTR_NAME2(_type, _name, _name2) \ + XATTR_CEPH_PREFIX #_type "." #_name "." #_name2 + +#define XATTR_NAME_CEPH(_type, _name) \ + { \ + .name = CEPH_XATTR_NAME(_type, _name), \ + .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ + .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ + .readonly = true, \ + .hidden = false, \ + .exists_cb = NULL, \ + } +#define XATTR_LAYOUT_FIELD(_type, _name, _field) \ + { \ + .name = CEPH_XATTR_NAME2(_type, _name, _field), \ + .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \ + .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \ + .readonly = false, \ + .hidden = true, \ + .exists_cb = ceph_vxattrcb_layout_exists, \ + } static struct ceph_vxattr ceph_dir_vxattrs[] = { + { + .name = "ceph.dir.layout", + .name_size = sizeof("ceph.dir.layout"), + .getxattr_cb = ceph_vxattrcb_layout, + .readonly = false, + .hidden = false, + .exists_cb = ceph_vxattrcb_layout_exists, + }, + XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), + XATTR_LAYOUT_FIELD(dir, layout, stripe_count), + XATTR_LAYOUT_FIELD(dir, layout, object_size), + XATTR_LAYOUT_FIELD(dir, layout, pool), XATTR_NAME_CEPH(dir, entries), XATTR_NAME_CEPH(dir, files), XATTR_NAME_CEPH(dir, subdirs), @@ -102,35 +213,26 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { XATTR_NAME_CEPH(dir, rsubdirs), XATTR_NAME_CEPH(dir, rbytes), XATTR_NAME_CEPH(dir, rctime), - { 0 } /* Required table terminator */ + { .name = NULL, 0 } /* Required table terminator */ }; static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ /* files */ -static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val, - size_t size) -{ - int ret; - - ret = snprintf(val, size, - "chunk_bytes=%lld\nstripe_count=%lld\nobject_size=%lld\n", - (unsigned long long)ceph_file_layout_su(ci->i_layout), - (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), - (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); - return ret; -} - static struct ceph_vxattr ceph_file_vxattrs[] = { - XATTR_NAME_CEPH(file, layout), - /* The following extended attribute name is deprecated */ { - .name = XATTR_CEPH_PREFIX "layout", - .name_size = sizeof (XATTR_CEPH_PREFIX "layout"), - .getxattr_cb = ceph_vxattrcb_file_layout, - .readonly = true, + .name = "ceph.file.layout", + .name_size = sizeof("ceph.file.layout"), + .getxattr_cb = ceph_vxattrcb_layout, + .readonly = false, + .hidden = false, + .exists_cb = ceph_vxattrcb_layout_exists, }, - { 0 } /* Required table terminator */ + XATTR_LAYOUT_FIELD(file, layout, stripe_unit), + XATTR_LAYOUT_FIELD(file, layout, stripe_count), + XATTR_LAYOUT_FIELD(file, layout, object_size), + XATTR_LAYOUT_FIELD(file, layout, pool), + { .name = NULL, 0 } /* Required table terminator */ }; static size_t ceph_file_vxattrs_name_size; /* total size of all names */ @@ -164,7 +266,8 @@ static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs) size_t size = 0; for (vxattr = vxattrs; vxattr->name; vxattr++) - size += vxattr->name_size; + if (!vxattr->hidden) + size += vxattr->name_size; return size; } @@ -572,13 +675,17 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, if (!ceph_is_valid_xattr(name)) return -ENODATA; - /* let's see if a virtual xattr was requested */ - vxattr = ceph_match_vxattr(inode, name); - spin_lock(&ci->i_ceph_lock); dout("getxattr %p ver=%lld index_ver=%lld\n", inode, ci->i_xattrs.version, ci->i_xattrs.index_version); + /* let's see if a virtual xattr was requested */ + vxattr = ceph_match_vxattr(inode, name); + if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) { + err = vxattr->getxattr_cb(ci, value, size); + goto out; + } + if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { goto get_xattr; @@ -592,11 +699,6 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, spin_lock(&ci->i_ceph_lock); - if (vxattr && vxattr->readonly) { - err = vxattr->getxattr_cb(ci, value, size); - goto out; - } - err = __build_xattrs(inode); if (err < 0) goto out; @@ -604,11 +706,8 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, get_xattr: err = -ENODATA; /* == ENOATTR */ xattr = __get_xattr(ci, name); - if (!xattr) { - if (vxattr) - err = vxattr->getxattr_cb(ci, value, size); + if (!xattr) goto out; - } err = -ERANGE; if (size && size < xattr->val_len) @@ -664,23 +763,30 @@ list_xattr: vir_namelen = ceph_vxattrs_name_size(vxattrs); /* adding 1 byte per each variable due to the null termination */ - namelen = vir_namelen + ci->i_xattrs.names_size + ci->i_xattrs.count; + namelen = ci->i_xattrs.names_size + ci->i_xattrs.count; err = -ERANGE; - if (size && namelen > size) + if (size && vir_namelen + namelen > size) goto out; - err = namelen; + err = namelen + vir_namelen; if (size == 0) goto out; names = __copy_xattr_names(ci, names); /* virtual xattr names, too */ - if (vxattrs) + err = namelen; + if (vxattrs) { for (i = 0; vxattrs[i].name; i++) { - len = sprintf(names, "%s", vxattrs[i].name); - names += len + 1; + if (!vxattrs[i].hidden && + !(vxattrs[i].exists_cb && + !vxattrs[i].exists_cb(ci))) { + len = sprintf(names, "%s", vxattrs[i].name); + names += len + 1; + err += len + 1; + } } + } out: spin_unlock(&ci->i_ceph_lock); @@ -782,6 +888,10 @@ int ceph_setxattr(struct dentry *dentry, const char *name, if (vxattr && vxattr->readonly) return -EOPNOTSUPP; + /* pass any unhandled ceph.* xattrs through to the MDS */ + if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) + goto do_sync_unlocked; + /* preallocate memory for xattr name, value, index node */ err = -ENOMEM; newname = kmemdup(name, name_len + 1, GFP_NOFS); @@ -838,6 +948,7 @@ retry: do_sync: spin_unlock(&ci->i_ceph_lock); +do_sync_unlocked: err = ceph_sync_setxattr(dentry, name, value, size, flags); out: kfree(newname); @@ -892,6 +1003,10 @@ int ceph_removexattr(struct dentry *dentry, const char *name) if (vxattr && vxattr->readonly) return -EOPNOTSUPP; + /* pass any unhandled ceph.* xattrs through to the MDS */ + if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) + goto do_sync_unlocked; + err = -ENOMEM; spin_lock(&ci->i_ceph_lock); retry: @@ -931,6 +1046,7 @@ retry: return err; do_sync: spin_unlock(&ci->i_ceph_lock); +do_sync_unlocked: err = ceph_send_removexattr(dentry, name); out: return err; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index d2a833999bcc..83f2606c76d0 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -816,10 +816,9 @@ static bool inode_has_hashed_dentries(struct inode *inode) { struct dentry *dentry; - struct hlist_node *p; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { spin_unlock(&inode->i_lock); return true; diff --git a/fs/coredump.c b/fs/coredump.c index 69baf903d3bd..c6479658d487 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -501,7 +501,7 @@ void do_coredump(siginfo_t *siginfo) * so we dump it as root in mode 2, and only into a controlled * environment (pipe handler or fully qualified path). */ - if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { + if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ diff --git a/fs/dcache.c b/fs/dcache.c index 68220dd0c135..fbfae008ba44 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -675,11 +675,10 @@ EXPORT_SYMBOL(dget_parent); static struct dentry *__d_find_alias(struct inode *inode, int want_discon) { struct dentry *alias, *discon_alias; - struct hlist_node *p; again: discon_alias = NULL; - hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && @@ -730,10 +729,9 @@ EXPORT_SYMBOL(d_find_alias); void d_prune_aliases(struct inode *inode) { struct dentry *dentry; - struct hlist_node *p; restart: spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_count) { __dget_dlock(dentry); @@ -1443,14 +1441,13 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, int len = entry->d_name.len; const char *name = entry->d_name.name; unsigned int hash = entry->d_name.hash; - struct hlist_node *p; if (!inode) { __d_instantiate(entry, NULL); return NULL; } - hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { /* * Don't need alias->d_lock here, because aliases with * d_parent == entry->d_parent are not subject to name or diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index f7501651762d..1b1146670c4b 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -1183,7 +1183,7 @@ static void detach_lkb(struct dlm_lkb *lkb) static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) { struct dlm_lkb *lkb; - int rv, id; + int rv; lkb = dlm_allocate_lkb(ls); if (!lkb) @@ -1199,19 +1199,13 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) mutex_init(&lkb->lkb_cb_mutex); INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work); - retry: - rv = idr_pre_get(&ls->ls_lkbidr, GFP_NOFS); - if (!rv) - return -ENOMEM; - + idr_preload(GFP_NOFS); spin_lock(&ls->ls_lkbidr_spin); - rv = idr_get_new_above(&ls->ls_lkbidr, lkb, 1, &id); - if (!rv) - lkb->lkb_id = id; + rv = idr_alloc(&ls->ls_lkbidr, lkb, 1, 0, GFP_NOWAIT); + if (rv >= 0) + lkb->lkb_id = rv; spin_unlock(&ls->ls_lkbidr_spin); - - if (rv == -EAGAIN) - goto retry; + idr_preload_end(); if (rv < 0) { log_error(ls, "create_lkb idr error %d", rv); diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 2e99fb0c9737..3ca79d3253b9 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -796,7 +796,6 @@ static int release_lockspace(struct dlm_ls *ls, int force) */ idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls); - idr_remove_all(&ls->ls_lkbidr); idr_destroy(&ls->ls_lkbidr); /* diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index dd87a31bcc21..4f5ad246582f 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -177,12 +177,11 @@ static inline int nodeid_hash(int nodeid) static struct connection *__find_con(int nodeid) { int r; - struct hlist_node *h; struct connection *con; r = nodeid_hash(nodeid); - hlist_for_each_entry(con, h, &connection_hash[r], list) { + hlist_for_each_entry(con, &connection_hash[r], list) { if (con->nodeid == nodeid) return con; } @@ -232,13 +231,12 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc) static void foreach_conn(void (*conn_func)(struct connection *c)) { int i; - struct hlist_node *h, *n; + struct hlist_node *n; struct connection *con; for (i = 0; i < CONN_HASH_SIZE; i++) { - hlist_for_each_entry_safe(con, h, n, &connection_hash[i], list){ + hlist_for_each_entry_safe(con, n, &connection_hash[i], list) conn_func(con); - } } } @@ -257,13 +255,12 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation) static struct connection *assoc2con(int assoc_id) { int i; - struct hlist_node *h; struct connection *con; mutex_lock(&connections_lock); for (i = 0 ; i < CONN_HASH_SIZE; i++) { - hlist_for_each_entry(con, h, &connection_hash[i], list) { + hlist_for_each_entry(con, &connection_hash[i], list) { if (con->sctp_assoc == assoc_id) { mutex_unlock(&connections_lock); return con; diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index aedea28a86a1..a6bc63f6e31b 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -305,27 +305,26 @@ static int recover_idr_empty(struct dlm_ls *ls) static int recover_idr_add(struct dlm_rsb *r) { struct dlm_ls *ls = r->res_ls; - int rv, id; - - rv = idr_pre_get(&ls->ls_recover_idr, GFP_NOFS); - if (!rv) - return -ENOMEM; + int rv; + idr_preload(GFP_NOFS); spin_lock(&ls->ls_recover_idr_lock); if (r->res_id) { - spin_unlock(&ls->ls_recover_idr_lock); - return -1; - } - rv = idr_get_new_above(&ls->ls_recover_idr, r, 1, &id); - if (rv) { - spin_unlock(&ls->ls_recover_idr_lock); - return rv; + rv = -1; + goto out_unlock; } - r->res_id = id; + rv = idr_alloc(&ls->ls_recover_idr, r, 1, 0, GFP_NOWAIT); + if (rv < 0) + goto out_unlock; + + r->res_id = rv; ls->ls_recover_list_count++; dlm_hold_rsb(r); + rv = 0; +out_unlock: spin_unlock(&ls->ls_recover_idr_lock); - return 0; + idr_preload_end(); + return rv; } static void recover_idr_del(struct dlm_rsb *r) @@ -351,24 +350,21 @@ static struct dlm_rsb *recover_idr_find(struct dlm_ls *ls, uint64_t id) return r; } -static int recover_idr_clear_rsb(int id, void *p, void *data) +static void recover_idr_clear(struct dlm_ls *ls) { - struct dlm_ls *ls = data; - struct dlm_rsb *r = p; + struct dlm_rsb *r; + int id; - r->res_id = 0; - r->res_recover_locks_count = 0; - ls->ls_recover_list_count--; + spin_lock(&ls->ls_recover_idr_lock); - dlm_put_rsb(r); - return 0; -} + idr_for_each_entry(&ls->ls_recover_idr, r, id) { + idr_remove(&ls->ls_recover_idr, id); + r->res_id = 0; + r->res_recover_locks_count = 0; + ls->ls_recover_list_count--; -static void recover_idr_clear(struct dlm_ls *ls) -{ - spin_lock(&ls->ls_recover_idr_lock); - idr_for_each(&ls->ls_recover_idr, recover_idr_clear_rsb, ls); - idr_remove_all(&ls->ls_recover_idr); + dlm_put_rsb(r); + } if (ls->ls_recover_list_count != 0) { log_error(ls, "warning: recover_list_count %d", diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 5fa2471796c2..8d7a577ae497 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -115,10 +115,9 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) */ int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon) { - struct hlist_node *elem; int rc; - hlist_for_each_entry(*daemon, elem, + hlist_for_each_entry(*daemon, &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()], euid_chain) { if (uid_eq((*daemon)->file->f_cred->euid, current_euid())) { @@ -445,7 +444,6 @@ void ecryptfs_release_messaging(void) mutex_unlock(&ecryptfs_msg_ctx_lists_mux); } if (ecryptfs_daemon_hash) { - struct hlist_node *elem; struct ecryptfs_daemon *daemon; int i; @@ -453,7 +451,7 @@ void ecryptfs_release_messaging(void) for (i = 0; i < (1 << ecryptfs_hash_bits); i++) { int rc; - hlist_for_each_entry(daemon, elem, + hlist_for_each_entry(daemon, &ecryptfs_daemon_hash[i], euid_chain) { rc = ecryptfs_exorcise_daemon(daemon); diff --git a/fs/exec.c b/fs/exec.c index 864c50df660a..a96a4885bbbf 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1111,7 +1111,7 @@ void setup_new_exec(struct linux_binprm * bprm) current->sas_ss_sp = current->sas_ss_size = 0; if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid())) - set_dumpable(current->mm, SUID_DUMPABLE_ENABLED); + set_dumpable(current->mm, SUID_DUMP_USER); else set_dumpable(current->mm, suid_dumpable); @@ -1639,17 +1639,17 @@ EXPORT_SYMBOL(set_binfmt); void set_dumpable(struct mm_struct *mm, int value) { switch (value) { - case SUID_DUMPABLE_DISABLED: + case SUID_DUMP_DISABLE: clear_bit(MMF_DUMPABLE, &mm->flags); smp_wmb(); clear_bit(MMF_DUMP_SECURELY, &mm->flags); break; - case SUID_DUMPABLE_ENABLED: + case SUID_DUMP_USER: set_bit(MMF_DUMPABLE, &mm->flags); smp_wmb(); clear_bit(MMF_DUMP_SECURELY, &mm->flags); break; - case SUID_DUMPABLE_SAFE: + case SUID_DUMP_ROOT: set_bit(MMF_DUMP_SECURELY, &mm->flags); smp_wmb(); set_bit(MMF_DUMPABLE, &mm->flags); @@ -1662,7 +1662,7 @@ int __get_dumpable(unsigned long mm_flags) int ret; ret = mm_flags & MMF_DUMPABLE_MASK; - return (ret > SUID_DUMPABLE_ENABLED) ? SUID_DUMPABLE_SAFE : ret; + return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret; } int get_dumpable(struct mm_struct *mm) diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 5df4bb4aab14..262fc9940982 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -44,14 +44,13 @@ find_acceptable_alias(struct dentry *result, { struct dentry *dentry, *toput = NULL; struct inode *inode; - struct hlist_node *p; if (acceptable(context, result)) return result; inode = result->d_inode; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { dget(dentry); spin_unlock(&inode->i_lock); if (toput) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9c4f4b1c97f8..9ea0cde3fa9e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2512,12 +2512,8 @@ static int ext4_nonda_switch(struct super_block *sb) /* * Start pushing delalloc when 1/2 of free blocks are dirty. */ - if (dirty_blocks && (free_blocks < 2 * dirty_blocks) && - !writeback_in_progress(sb->s_bdi) && - down_read_trylock(&sb->s_umount)) { - writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); - up_read(&sb->s_umount); - } + if (dirty_blocks && (free_blocks < 2 * dirty_blocks)) + try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); if (2 * free_blocks < 3 * dirty_blocks || free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 12701a567752..e9cc3f0d58e2 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -95,6 +95,8 @@ struct msdos_sb_info { spinlock_t dir_hash_lock; struct hlist_head dir_hashtable[FAT_HASH_SIZE]; + + unsigned int dirty; /* fs state before mount */ }; #define FAT_CACHE_VALID 0 /* special case for valid cache */ diff --git a/fs/fat/inode.c b/fs/fat/inode.c index f8f491677a4a..acf6e479b443 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -341,12 +341,11 @@ struct inode *fat_iget(struct super_block *sb, loff_t i_pos) { struct msdos_sb_info *sbi = MSDOS_SB(sb); struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos); - struct hlist_node *_p; struct msdos_inode_info *i; struct inode *inode = NULL; spin_lock(&sbi->inode_hash_lock); - hlist_for_each_entry(i, _p, head, i_fat_hash) { + hlist_for_each_entry(i, head, i_fat_hash) { BUG_ON(i->vfs_inode.i_sb != sb); if (i->i_pos != i_pos) continue; @@ -488,10 +487,59 @@ static void fat_evict_inode(struct inode *inode) fat_detach(inode); } +static void fat_set_state(struct super_block *sb, + unsigned int set, unsigned int force) +{ + struct buffer_head *bh; + struct fat_boot_sector *b; + struct msdos_sb_info *sbi = sb->s_fs_info; + + /* do not change any thing if mounted read only */ + if ((sb->s_flags & MS_RDONLY) && !force) + return; + + /* do not change state if fs was dirty */ + if (sbi->dirty) { + /* warn only on set (mount). */ + if (set) + fat_msg(sb, KERN_WARNING, "Volume was not properly " + "unmounted. Some data may be corrupt. " + "Please run fsck."); + return; + } + + bh = sb_bread(sb, 0); + if (bh == NULL) { + fat_msg(sb, KERN_ERR, "unable to read boot sector " + "to mark fs as dirty"); + return; + } + + b = (struct fat_boot_sector *) bh->b_data; + + if (sbi->fat_bits == 32) { + if (set) + b->fat32.state |= FAT_STATE_DIRTY; + else + b->fat32.state &= ~FAT_STATE_DIRTY; + } else /* fat 16 and 12 */ { + if (set) + b->fat16.state |= FAT_STATE_DIRTY; + else + b->fat16.state &= ~FAT_STATE_DIRTY; + } + + mark_buffer_dirty(bh); + sync_dirty_buffer(bh); + brelse(bh); +} + static void fat_put_super(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); + fat_set_state(sb, 0, 0); + iput(sbi->fsinfo_inode); iput(sbi->fat_inode); @@ -566,8 +614,18 @@ static void __exit fat_destroy_inodecache(void) static int fat_remount(struct super_block *sb, int *flags, char *data) { + int new_rdonly; struct msdos_sb_info *sbi = MSDOS_SB(sb); *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); + + /* make sure we update state on remount. */ + new_rdonly = *flags & MS_RDONLY; + if (new_rdonly != (sb->s_flags & MS_RDONLY)) { + if (new_rdonly) + fat_set_state(sb, 0, 0); + else + fat_set_state(sb, 1, 1); + } return 0; } @@ -1298,17 +1356,17 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, sbi->prev_free = FAT_START_ENT; sb->s_maxbytes = 0xffffffff; - if (!sbi->fat_length && b->fat32_length) { + if (!sbi->fat_length && b->fat32.length) { struct fat_boot_fsinfo *fsinfo; struct buffer_head *fsinfo_bh; /* Must be FAT32 */ sbi->fat_bits = 32; - sbi->fat_length = le32_to_cpu(b->fat32_length); - sbi->root_cluster = le32_to_cpu(b->root_cluster); + sbi->fat_length = le32_to_cpu(b->fat32.length); + sbi->root_cluster = le32_to_cpu(b->fat32.root_cluster); /* MC - if info_sector is 0, don't multiply by 0 */ - sbi->fsinfo_sector = le16_to_cpu(b->info_sector); + sbi->fsinfo_sector = le16_to_cpu(b->fat32.info_sector); if (sbi->fsinfo_sector == 0) sbi->fsinfo_sector = 1; @@ -1362,6 +1420,12 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, if (sbi->fat_bits != 32) sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12; + /* some OSes set FAT_STATE_DIRTY and clean it on unmount. */ + if (sbi->fat_bits == 32) + sbi->dirty = b->fat32.state & FAT_STATE_DIRTY; + else /* fat 16 or 12 */ + sbi->dirty = b->fat16.state & FAT_STATE_DIRTY; + /* check that FAT table does not overflow */ fat_clusters = sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits; total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); @@ -1456,6 +1520,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, "the device does not support discard"); } + fat_set_state(sb, 1, 0); return 0; out_invalid: diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c index ef4b5faba87b..499c10438ca2 100644 --- a/fs/fat/nfs.c +++ b/fs/fat/nfs.c @@ -21,13 +21,12 @@ static struct inode *fat_dget(struct super_block *sb, int i_logstart) { struct msdos_sb_info *sbi = MSDOS_SB(sb); struct hlist_head *head; - struct hlist_node *_p; struct msdos_inode_info *i; struct inode *inode = NULL; head = sbi->dir_hashtable + fat_dir_hash(i_logstart); spin_lock(&sbi->dir_hash_lock); - hlist_for_each_entry(i, _p, head, i_dir_hash) { + hlist_for_each_entry(i, head, i_dir_hash) { BUG_ON(i->vfs_inode.i_sb != sb); if (i->i_logstart != i_logstart) continue; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 310972b72a66..21f46fb3a101 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -318,8 +318,14 @@ static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work) static int write_inode(struct inode *inode, struct writeback_control *wbc) { - if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) - return inode->i_sb->s_op->write_inode(inode, wbc); + int ret; + + if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) { + trace_writeback_write_inode_start(inode, wbc); + ret = inode->i_sb->s_op->write_inode(inode, wbc); + trace_writeback_write_inode(inode, wbc); + return ret; + } return 0; } @@ -450,6 +456,8 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) WARN_ON(!(inode->i_state & I_SYNC)); + trace_writeback_single_inode_start(inode, wbc, nr_to_write); + ret = do_writepages(mapping, wbc); /* @@ -1150,8 +1158,12 @@ void __mark_inode_dirty(struct inode *inode, int flags) * dirty the inode itself */ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { + trace_writeback_dirty_inode_start(inode, flags); + if (sb->s_op->dirty_inode) sb->s_op->dirty_inode(inode, flags); + + trace_writeback_dirty_inode(inode, flags); } /* @@ -1332,47 +1344,43 @@ void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) EXPORT_SYMBOL(writeback_inodes_sb); /** - * writeback_inodes_sb_if_idle - start writeback if none underway + * try_to_writeback_inodes_sb_nr - try to start writeback if none underway * @sb: the superblock - * @reason: reason why some writeback work was initiated + * @nr: the number of pages to write + * @reason: the reason of writeback * - * Invoke writeback_inodes_sb if no writeback is currently underway. + * Invoke writeback_inodes_sb_nr if no writeback is currently underway. * Returns 1 if writeback was started, 0 if not. */ -int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason) +int try_to_writeback_inodes_sb_nr(struct super_block *sb, + unsigned long nr, + enum wb_reason reason) { - if (!writeback_in_progress(sb->s_bdi)) { - down_read(&sb->s_umount); - writeback_inodes_sb(sb, reason); - up_read(&sb->s_umount); + if (writeback_in_progress(sb->s_bdi)) return 1; - } else + + if (!down_read_trylock(&sb->s_umount)) return 0; + + writeback_inodes_sb_nr(sb, nr, reason); + up_read(&sb->s_umount); + return 1; } -EXPORT_SYMBOL(writeback_inodes_sb_if_idle); +EXPORT_SYMBOL(try_to_writeback_inodes_sb_nr); /** - * writeback_inodes_sb_nr_if_idle - start writeback if none underway + * try_to_writeback_inodes_sb - try to start writeback if none underway * @sb: the superblock - * @nr: the number of pages to write * @reason: reason why some writeback work was initiated * - * Invoke writeback_inodes_sb if no writeback is currently underway. + * Implement by try_to_writeback_inodes_sb_nr() * Returns 1 if writeback was started, 0 if not. */ -int writeback_inodes_sb_nr_if_idle(struct super_block *sb, - unsigned long nr, - enum wb_reason reason) +int try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) { - if (!writeback_in_progress(sb->s_bdi)) { - down_read(&sb->s_umount); - writeback_inodes_sb_nr(sb, nr, reason); - up_read(&sb->s_umount); - return 1; - } else - return 0; + return try_to_writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason); } -EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle); +EXPORT_SYMBOL(try_to_writeback_inodes_sb); /** * sync_inodes_sb - sync sb inode pages diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 8dcb114758e3..e2cba1f60c21 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -237,13 +237,12 @@ static int fscache_alloc_object(struct fscache_cache *cache, struct fscache_cookie *cookie) { struct fscache_object *object; - struct hlist_node *_n; int ret; _enter("%p,%p{%s}", cache, cookie, cookie->def->name); spin_lock(&cookie->lock); - hlist_for_each_entry(object, _n, &cookie->backing_objects, + hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { if (object->cache == cache) goto object_already_extant; @@ -311,7 +310,6 @@ static int fscache_attach_object(struct fscache_cookie *cookie, { struct fscache_object *p; struct fscache_cache *cache = object->cache; - struct hlist_node *_n; int ret; _enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id); @@ -321,7 +319,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie, /* there may be multiple initial creations of this object, but we only * want one */ ret = -EEXIST; - hlist_for_each_entry(p, _n, &cookie->backing_objects, cookie_link) { + hlist_for_each_entry(p, &cookie->backing_objects, cookie_link) { if (p->cache == object->cache) { if (p->state >= FSCACHE_OBJECT_DYING) ret = -ENOBUFS; @@ -331,7 +329,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie, /* pin the parent object */ spin_lock_nested(&cookie->parent->lock, 1); - hlist_for_each_entry(p, _n, &cookie->parent->backing_objects, + hlist_for_each_entry(p, &cookie->parent->backing_objects, cookie_link) { if (p->cache == object->cache) { if (p->state >= FSCACHE_OBJECT_DYING) { @@ -435,7 +433,6 @@ EXPORT_SYMBOL(__fscache_wait_on_invalidate); void __fscache_update_cookie(struct fscache_cookie *cookie) { struct fscache_object *object; - struct hlist_node *_p; fscache_stat(&fscache_n_updates); @@ -452,7 +449,7 @@ void __fscache_update_cookie(struct fscache_cookie *cookie) spin_lock(&cookie->lock); /* update the index entry on disk in each cache backing this cookie */ - hlist_for_each_entry(object, _p, + hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); } diff --git a/fs/hfsplus/Makefile b/fs/hfsplus/Makefile index 3cc0df730156..09d278bb7b91 100644 --- a/fs/hfsplus/Makefile +++ b/fs/hfsplus/Makefile @@ -5,5 +5,5 @@ obj-$(CONFIG_HFSPLUS_FS) += hfsplus.o hfsplus-objs := super.o options.o inode.o ioctl.o extents.o catalog.o dir.o btree.o \ - bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o - + bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o \ + attributes.o xattr.o xattr_user.o xattr_security.o xattr_trusted.o diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c new file mode 100644 index 000000000000..8d691f124714 --- /dev/null +++ b/fs/hfsplus/attributes.c @@ -0,0 +1,399 @@ +/* + * linux/fs/hfsplus/attributes.c + * + * Vyacheslav Dubeyko <slava@dubeyko.com> + * + * Handling of records in attributes tree + */ + +#include "hfsplus_fs.h" +#include "hfsplus_raw.h" + +static struct kmem_cache *hfsplus_attr_tree_cachep; + +int hfsplus_create_attr_tree_cache(void) +{ + if (hfsplus_attr_tree_cachep) + return -EEXIST; + + hfsplus_attr_tree_cachep = + kmem_cache_create("hfsplus_attr_cache", + sizeof(hfsplus_attr_entry), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!hfsplus_attr_tree_cachep) + return -ENOMEM; + + return 0; +} + +void hfsplus_destroy_attr_tree_cache(void) +{ + kmem_cache_destroy(hfsplus_attr_tree_cachep); +} + +int hfsplus_attr_bin_cmp_key(const hfsplus_btree_key *k1, + const hfsplus_btree_key *k2) +{ + __be32 k1_cnid, k2_cnid; + + k1_cnid = k1->attr.cnid; + k2_cnid = k2->attr.cnid; + if (k1_cnid != k2_cnid) + return be32_to_cpu(k1_cnid) < be32_to_cpu(k2_cnid) ? -1 : 1; + + return hfsplus_strcmp( + (const struct hfsplus_unistr *)&k1->attr.key_name, + (const struct hfsplus_unistr *)&k2->attr.key_name); +} + +int hfsplus_attr_build_key(struct super_block *sb, hfsplus_btree_key *key, + u32 cnid, const char *name) +{ + int len; + + memset(key, 0, sizeof(struct hfsplus_attr_key)); + key->attr.cnid = cpu_to_be32(cnid); + if (name) { + len = strlen(name); + if (len > HFSPLUS_ATTR_MAX_STRLEN) { + printk(KERN_ERR "hfs: invalid xattr name's length\n"); + return -EINVAL; + } + hfsplus_asc2uni(sb, + (struct hfsplus_unistr *)&key->attr.key_name, + HFSPLUS_ATTR_MAX_STRLEN, name, len); + len = be16_to_cpu(key->attr.key_name.length); + } else { + key->attr.key_name.length = 0; + len = 0; + } + + /* The length of the key, as stored in key_len field, does not include + * the size of the key_len field itself. + * So, offsetof(hfsplus_attr_key, key_name) is a trick because + * it takes into consideration key_len field (__be16) of + * hfsplus_attr_key structure instead of length field (__be16) of + * hfsplus_attr_unistr structure. + */ + key->key_len = + cpu_to_be16(offsetof(struct hfsplus_attr_key, key_name) + + 2 * len); + + return 0; +} + +void hfsplus_attr_build_key_uni(hfsplus_btree_key *key, + u32 cnid, + struct hfsplus_attr_unistr *name) +{ + int ustrlen; + + memset(key, 0, sizeof(struct hfsplus_attr_key)); + ustrlen = be16_to_cpu(name->length); + key->attr.cnid = cpu_to_be32(cnid); + key->attr.key_name.length = cpu_to_be16(ustrlen); + ustrlen *= 2; + memcpy(key->attr.key_name.unicode, name->unicode, ustrlen); + + /* The length of the key, as stored in key_len field, does not include + * the size of the key_len field itself. + * So, offsetof(hfsplus_attr_key, key_name) is a trick because + * it takes into consideration key_len field (__be16) of + * hfsplus_attr_key structure instead of length field (__be16) of + * hfsplus_attr_unistr structure. + */ + key->key_len = + cpu_to_be16(offsetof(struct hfsplus_attr_key, key_name) + + ustrlen); +} + +hfsplus_attr_entry *hfsplus_alloc_attr_entry(void) +{ + return kmem_cache_alloc(hfsplus_attr_tree_cachep, GFP_KERNEL); +} + +void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry) +{ + if (entry) + kmem_cache_free(hfsplus_attr_tree_cachep, entry); +} + +#define HFSPLUS_INVALID_ATTR_RECORD -1 + +static int hfsplus_attr_build_record(hfsplus_attr_entry *entry, int record_type, + u32 cnid, const void *value, size_t size) +{ + if (record_type == HFSPLUS_ATTR_FORK_DATA) { + /* + * Mac OS X supports only inline data attributes. + * Do nothing + */ + memset(entry, 0, sizeof(*entry)); + return sizeof(struct hfsplus_attr_fork_data); + } else if (record_type == HFSPLUS_ATTR_EXTENTS) { + /* + * Mac OS X supports only inline data attributes. + * Do nothing. + */ + memset(entry, 0, sizeof(*entry)); + return sizeof(struct hfsplus_attr_extents); + } else if (record_type == HFSPLUS_ATTR_INLINE_DATA) { + u16 len; + + memset(entry, 0, sizeof(struct hfsplus_attr_inline_data)); + entry->inline_data.record_type = cpu_to_be32(record_type); + if (size <= HFSPLUS_MAX_INLINE_DATA_SIZE) + len = size; + else + return HFSPLUS_INVALID_ATTR_RECORD; + entry->inline_data.length = cpu_to_be16(len); + memcpy(entry->inline_data.raw_bytes, value, len); + /* + * Align len on two-byte boundary. + * It needs to add pad byte if we have odd len. + */ + len = round_up(len, 2); + return offsetof(struct hfsplus_attr_inline_data, raw_bytes) + + len; + } else /* invalid input */ + memset(entry, 0, sizeof(*entry)); + + return HFSPLUS_INVALID_ATTR_RECORD; +} + +int hfsplus_find_attr(struct super_block *sb, u32 cnid, + const char *name, struct hfs_find_data *fd) +{ + int err = 0; + + dprint(DBG_ATTR_MOD, "find_attr: %s,%d\n", name ? name : NULL, cnid); + + if (!HFSPLUS_SB(sb)->attr_tree) { + printk(KERN_ERR "hfs: attributes file doesn't exist\n"); + return -EINVAL; + } + + if (name) { + err = hfsplus_attr_build_key(sb, fd->search_key, cnid, name); + if (err) + goto failed_find_attr; + err = hfs_brec_find(fd, hfs_find_rec_by_key); + if (err) + goto failed_find_attr; + } else { + err = hfsplus_attr_build_key(sb, fd->search_key, cnid, NULL); + if (err) + goto failed_find_attr; + err = hfs_brec_find(fd, hfs_find_1st_rec_by_cnid); + if (err) + goto failed_find_attr; + } + +failed_find_attr: + return err; +} + +int hfsplus_attr_exists(struct inode *inode, const char *name) +{ + int err = 0; + struct super_block *sb = inode->i_sb; + struct hfs_find_data fd; + + if (!HFSPLUS_SB(sb)->attr_tree) + return 0; + + err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd); + if (err) + return 0; + + err = hfsplus_find_attr(sb, inode->i_ino, name, &fd); + if (err) + goto attr_not_found; + + hfs_find_exit(&fd); + return 1; + +attr_not_found: + hfs_find_exit(&fd); + return 0; +} + +int hfsplus_create_attr(struct inode *inode, + const char *name, + const void *value, size_t size) +{ + struct super_block *sb = inode->i_sb; + struct hfs_find_data fd; + hfsplus_attr_entry *entry_ptr; + int entry_size; + int err; + + dprint(DBG_ATTR_MOD, "create_attr: %s,%ld\n", + name ? name : NULL, inode->i_ino); + + if (!HFSPLUS_SB(sb)->attr_tree) { + printk(KERN_ERR "hfs: attributes file doesn't exist\n"); + return -EINVAL; + } + + entry_ptr = hfsplus_alloc_attr_entry(); + if (!entry_ptr) + return -ENOMEM; + + err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd); + if (err) + goto failed_init_create_attr; + + if (name) { + err = hfsplus_attr_build_key(sb, fd.search_key, + inode->i_ino, name); + if (err) + goto failed_create_attr; + } else { + err = -EINVAL; + goto failed_create_attr; + } + + /* Mac OS X supports only inline data attributes. */ + entry_size = hfsplus_attr_build_record(entry_ptr, + HFSPLUS_ATTR_INLINE_DATA, + inode->i_ino, + value, size); + if (entry_size == HFSPLUS_INVALID_ATTR_RECORD) { + err = -EINVAL; + goto failed_create_attr; + } + + err = hfs_brec_find(&fd, hfs_find_rec_by_key); + if (err != -ENOENT) { + if (!err) + err = -EEXIST; + goto failed_create_attr; + } + + err = hfs_brec_insert(&fd, entry_ptr, entry_size); + if (err) + goto failed_create_attr; + + hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ATTR_DIRTY); + +failed_create_attr: + hfs_find_exit(&fd); + +failed_init_create_attr: + hfsplus_destroy_attr_entry(entry_ptr); + return err; +} + +static int __hfsplus_delete_attr(struct inode *inode, u32 cnid, + struct hfs_find_data *fd) +{ + int err = 0; + __be32 found_cnid, record_type; + + hfs_bnode_read(fd->bnode, &found_cnid, + fd->keyoffset + + offsetof(struct hfsplus_attr_key, cnid), + sizeof(__be32)); + if (cnid != be32_to_cpu(found_cnid)) + return -ENOENT; + + hfs_bnode_read(fd->bnode, &record_type, + fd->entryoffset, sizeof(record_type)); + + switch (be32_to_cpu(record_type)) { + case HFSPLUS_ATTR_INLINE_DATA: + /* All is OK. Do nothing. */ + break; + case HFSPLUS_ATTR_FORK_DATA: + case HFSPLUS_ATTR_EXTENTS: + printk(KERN_ERR "hfs: only inline data xattr are supported\n"); + return -EOPNOTSUPP; + default: + printk(KERN_ERR "hfs: invalid extended attribute record\n"); + return -ENOENT; + } + + err = hfs_brec_remove(fd); + if (err) + return err; + + hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ATTR_DIRTY); + return err; +} + +int hfsplus_delete_attr(struct inode *inode, const char *name) +{ + int err = 0; + struct super_block *sb = inode->i_sb; + struct hfs_find_data fd; + + dprint(DBG_ATTR_MOD, "delete_attr: %s,%ld\n", + name ? name : NULL, inode->i_ino); + + if (!HFSPLUS_SB(sb)->attr_tree) { + printk(KERN_ERR "hfs: attributes file doesn't exist\n"); + return -EINVAL; + } + + err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd); + if (err) + return err; + + if (name) { + err = hfsplus_attr_build_key(sb, fd.search_key, + inode->i_ino, name); + if (err) + goto out; + } else { + printk(KERN_ERR "hfs: invalid extended attribute name\n"); + err = -EINVAL; + goto out; + } + + err = hfs_brec_find(&fd, hfs_find_rec_by_key); + if (err) + goto out; + + err = __hfsplus_delete_attr(inode, inode->i_ino, &fd); + if (err) + goto out; + +out: + hfs_find_exit(&fd); + return err; +} + +int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid) +{ + int err = 0; + struct hfs_find_data fd; + + dprint(DBG_ATTR_MOD, "delete_all_attrs: %d\n", cnid); + + if (!HFSPLUS_SB(dir->i_sb)->attr_tree) { + printk(KERN_ERR "hfs: attributes file doesn't exist\n"); + return -EINVAL; + } + + err = hfs_find_init(HFSPLUS_SB(dir->i_sb)->attr_tree, &fd); + if (err) + return err; + + for (;;) { + err = hfsplus_find_attr(dir->i_sb, cnid, NULL, &fd); + if (err) { + if (err != -ENOENT) + printk(KERN_ERR "hfs: xattr search failed.\n"); + goto end_delete_all; + } + + err = __hfsplus_delete_attr(dir, cnid, &fd); + if (err) + goto end_delete_all; + } + +end_delete_all: + hfs_find_exit(&fd); + return err; +} diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c index 5d799c13205f..d73c98d1ee99 100644 --- a/fs/hfsplus/bfind.c +++ b/fs/hfsplus/bfind.c @@ -24,7 +24,19 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) fd->key = ptr + tree->max_key_len + 2; dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); - mutex_lock(&tree->tree_lock); + switch (tree->cnid) { + case HFSPLUS_CAT_CNID: + mutex_lock_nested(&tree->tree_lock, CATALOG_BTREE_MUTEX); + break; + case HFSPLUS_EXT_CNID: + mutex_lock_nested(&tree->tree_lock, EXTENTS_BTREE_MUTEX); + break; + case HFSPLUS_ATTR_CNID: + mutex_lock_nested(&tree->tree_lock, ATTR_BTREE_MUTEX); + break; + default: + BUG(); + } return 0; } @@ -38,15 +50,73 @@ void hfs_find_exit(struct hfs_find_data *fd) fd->tree = NULL; } -/* Find the record in bnode that best matches key (not greater than...)*/ -int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) +int hfs_find_1st_rec_by_cnid(struct hfs_bnode *bnode, + struct hfs_find_data *fd, + int *begin, + int *end, + int *cur_rec) +{ + __be32 cur_cnid, search_cnid; + + if (bnode->tree->cnid == HFSPLUS_EXT_CNID) { + cur_cnid = fd->key->ext.cnid; + search_cnid = fd->search_key->ext.cnid; + } else if (bnode->tree->cnid == HFSPLUS_CAT_CNID) { + cur_cnid = fd->key->cat.parent; + search_cnid = fd->search_key->cat.parent; + } else if (bnode->tree->cnid == HFSPLUS_ATTR_CNID) { + cur_cnid = fd->key->attr.cnid; + search_cnid = fd->search_key->attr.cnid; + } else + BUG(); + + if (cur_cnid == search_cnid) { + (*end) = (*cur_rec); + if ((*begin) == (*end)) + return 1; + } else { + if (be32_to_cpu(cur_cnid) < be32_to_cpu(search_cnid)) + (*begin) = (*cur_rec) + 1; + else + (*end) = (*cur_rec) - 1; + } + + return 0; +} + +int hfs_find_rec_by_key(struct hfs_bnode *bnode, + struct hfs_find_data *fd, + int *begin, + int *end, + int *cur_rec) { int cmpval; + + cmpval = bnode->tree->keycmp(fd->key, fd->search_key); + if (!cmpval) { + (*end) = (*cur_rec); + return 1; + } + if (cmpval < 0) + (*begin) = (*cur_rec) + 1; + else + *(end) = (*cur_rec) - 1; + + return 0; +} + +/* Find the record in bnode that best matches key (not greater than...)*/ +int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd, + search_strategy_t rec_found) +{ u16 off, len, keylen; int rec; int b, e; int res; + if (!rec_found) + BUG(); + b = 0; e = bnode->num_recs - 1; res = -ENOENT; @@ -59,17 +129,12 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) goto fail; } hfs_bnode_read(bnode, fd->key, off, keylen); - cmpval = bnode->tree->keycmp(fd->key, fd->search_key); - if (!cmpval) { - e = rec; + if (rec_found(bnode, fd, &b, &e, &rec)) { res = 0; goto done; } - if (cmpval < 0) - b = rec + 1; - else - e = rec - 1; } while (b <= e); + if (rec != e && e >= 0) { len = hfs_brec_lenoff(bnode, e, &off); keylen = hfs_brec_keylen(bnode, e); @@ -79,19 +144,21 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) } hfs_bnode_read(bnode, fd->key, off, keylen); } + done: fd->record = e; fd->keyoffset = off; fd->keylength = keylen; fd->entryoffset = off + keylen; fd->entrylength = len - keylen; + fail: return res; } /* Traverse a B*Tree from the root to a leaf finding best fit to key */ /* Return allocated copy of node found, set recnum to best record */ -int hfs_brec_find(struct hfs_find_data *fd) +int hfs_brec_find(struct hfs_find_data *fd, search_strategy_t do_key_compare) { struct hfs_btree *tree; struct hfs_bnode *bnode; @@ -122,7 +189,7 @@ int hfs_brec_find(struct hfs_find_data *fd) goto invalid; bnode->parent = parent; - res = __hfs_brec_find(bnode, fd); + res = __hfs_brec_find(bnode, fd, do_key_compare); if (!height) break; if (fd->record < 0) @@ -149,7 +216,7 @@ int hfs_brec_read(struct hfs_find_data *fd, void *rec, int rec_len) { int res; - res = hfs_brec_find(fd); + res = hfs_brec_find(fd, hfs_find_rec_by_key); if (res) return res; if (fd->entrylength > rec_len) diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 1c42cc5b899f..f31ac6f404f1 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -62,7 +62,8 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off) tree = node->tree; if (node->type == HFS_NODE_LEAF || - tree->attributes & HFS_TREE_VARIDXKEYS) + tree->attributes & HFS_TREE_VARIDXKEYS || + node->tree->cnid == HFSPLUS_ATTR_CNID) key_len = hfs_bnode_read_u16(node, off) + 2; else key_len = tree->max_key_len + 2; @@ -314,7 +315,8 @@ void hfs_bnode_dump(struct hfs_bnode *node) if (i && node->type == HFS_NODE_INDEX) { int tmp; - if (node->tree->attributes & HFS_TREE_VARIDXKEYS) + if (node->tree->attributes & HFS_TREE_VARIDXKEYS || + node->tree->cnid == HFSPLUS_ATTR_CNID) tmp = hfs_bnode_read_u16(node, key_off) + 2; else tmp = node->tree->max_key_len + 2; @@ -646,6 +648,8 @@ void hfs_bnode_put(struct hfs_bnode *node) if (test_bit(HFS_BNODE_DELETED, &node->flags)) { hfs_bnode_unhash(node); spin_unlock(&tree->hash_lock); + hfs_bnode_clear(node, 0, + PAGE_CACHE_SIZE * tree->pages_per_bnode); hfs_bmap_free(node); hfs_bnode_free(node); return; diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 2a734cfccc92..298d4e45604b 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -36,7 +36,8 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) return 0; if ((node->type == HFS_NODE_INDEX) && - !(node->tree->attributes & HFS_TREE_VARIDXKEYS)) { + !(node->tree->attributes & HFS_TREE_VARIDXKEYS) && + (node->tree->cnid != HFSPLUS_ATTR_CNID)) { retval = node->tree->max_key_len + 2; } else { recoff = hfs_bnode_read_u16(node, @@ -151,12 +152,13 @@ skip: /* get index key */ hfs_bnode_read_key(new_node, fd->search_key, 14); - __hfs_brec_find(fd->bnode, fd); + __hfs_brec_find(fd->bnode, fd, hfs_find_rec_by_key); hfs_bnode_put(new_node); new_node = NULL; - if (tree->attributes & HFS_TREE_VARIDXKEYS) + if ((tree->attributes & HFS_TREE_VARIDXKEYS) || + (tree->cnid == HFSPLUS_ATTR_CNID)) key_len = be16_to_cpu(fd->search_key->key_len) + 2; else { fd->search_key->key_len = @@ -201,7 +203,7 @@ again: hfs_bnode_put(node); node = fd->bnode = parent; - __hfs_brec_find(node, fd); + __hfs_brec_find(node, fd, hfs_find_rec_by_key); goto again; } hfs_bnode_write_u16(node, @@ -367,12 +369,13 @@ again: parent = hfs_bnode_find(tree, node->parent); if (IS_ERR(parent)) return PTR_ERR(parent); - __hfs_brec_find(parent, fd); + __hfs_brec_find(parent, fd, hfs_find_rec_by_key); hfs_bnode_dump(parent); rec = fd->record; /* size difference between old and new key */ - if (tree->attributes & HFS_TREE_VARIDXKEYS) + if ((tree->attributes & HFS_TREE_VARIDXKEYS) || + (tree->cnid == HFSPLUS_ATTR_CNID)) newkeylen = hfs_bnode_read_u16(node, 14) + 2; else fd->keylength = newkeylen = tree->max_key_len + 2; @@ -427,7 +430,7 @@ skip: hfs_bnode_read_key(new_node, fd->search_key, 14); cnid = cpu_to_be32(new_node->this); - __hfs_brec_find(fd->bnode, fd); + __hfs_brec_find(fd->bnode, fd, hfs_find_rec_by_key); hfs_brec_insert(fd, &cnid, sizeof(cnid)); hfs_bnode_put(fd->bnode); hfs_bnode_put(new_node); @@ -495,13 +498,15 @@ static int hfs_btree_inc_height(struct hfs_btree *tree) /* insert old root idx into new root */ node->parent = tree->root; if (node->type == HFS_NODE_LEAF || - tree->attributes & HFS_TREE_VARIDXKEYS) + tree->attributes & HFS_TREE_VARIDXKEYS || + tree->cnid == HFSPLUS_ATTR_CNID) key_size = hfs_bnode_read_u16(node, 14) + 2; else key_size = tree->max_key_len + 2; hfs_bnode_copy(new_node, 14, node, 14, key_size); - if (!(tree->attributes & HFS_TREE_VARIDXKEYS)) { + if (!(tree->attributes & HFS_TREE_VARIDXKEYS) && + (tree->cnid != HFSPLUS_ATTR_CNID)) { key_size = tree->max_key_len + 2; hfs_bnode_write_u16(new_node, 14, tree->max_key_len); } diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 685d07d0ed18..efb689c21a95 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -98,6 +98,14 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) set_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); } break; + case HFSPLUS_ATTR_CNID: + if (tree->max_key_len != HFSPLUS_ATTR_KEYLEN - sizeof(u16)) { + printk(KERN_ERR "hfs: invalid attributes max_key_len %d\n", + tree->max_key_len); + goto fail_page; + } + tree->keycmp = hfsplus_attr_bin_cmp_key; + break; default: printk(KERN_ERR "hfs: unknown B*Tree requested\n"); goto fail_page; diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 798d9c4c5e71..840d71edd193 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -45,7 +45,8 @@ void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key, key->cat.parent = cpu_to_be32(parent); if (str) { - hfsplus_asc2uni(sb, &key->cat.name, str->name, str->len); + hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN, + str->name, str->len); len = be16_to_cpu(key->cat.name.length); } else { key->cat.name.length = 0; @@ -167,7 +168,8 @@ static int hfsplus_fill_cat_thread(struct super_block *sb, entry->type = cpu_to_be16(type); entry->thread.reserved = 0; entry->thread.parentID = cpu_to_be32(parentid); - hfsplus_asc2uni(sb, &entry->thread.nodeName, str->name, str->len); + hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN, + str->name, str->len); return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2; } @@ -198,7 +200,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, hfsplus_cat_build_key_uni(fd->search_key, be32_to_cpu(tmp.thread.parentID), &tmp.thread.nodeName); - return hfs_brec_find(fd); + return hfs_brec_find(fd, hfs_find_rec_by_key); } int hfsplus_create_cat(u32 cnid, struct inode *dir, @@ -221,7 +223,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, S_ISDIR(inode->i_mode) ? HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD, dir->i_ino, str); - err = hfs_brec_find(&fd); + err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err != -ENOENT) { if (!err) err = -EEXIST; @@ -233,7 +235,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); entry_size = hfsplus_cat_build_record(&entry, cnid, inode); - err = hfs_brec_find(&fd); + err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err != -ENOENT) { /* panic? */ if (!err) @@ -253,7 +255,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, err1: hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); - if (!hfs_brec_find(&fd)) + if (!hfs_brec_find(&fd, hfs_find_rec_by_key)) hfs_brec_remove(&fd); err2: hfs_find_exit(&fd); @@ -279,7 +281,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) int len; hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); - err = hfs_brec_find(&fd); + err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; @@ -296,7 +298,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) } else hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); - err = hfs_brec_find(&fd); + err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; @@ -326,7 +328,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) goto out; hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); - err = hfs_brec_find(&fd); + err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; @@ -337,6 +339,12 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) dir->i_size--; dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); + + if (type == HFSPLUS_FILE || type == HFSPLUS_FOLDER) { + if (HFSPLUS_SB(sb)->attr_tree) + hfsplus_delete_all_attrs(dir, cnid); + } + out: hfs_find_exit(&fd); @@ -363,7 +371,7 @@ int hfsplus_rename_cat(u32 cnid, /* find the old dir entry and read the data */ hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); - err = hfs_brec_find(&src_fd); + err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); if (err) goto out; if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) { @@ -376,7 +384,7 @@ int hfsplus_rename_cat(u32 cnid, /* create new dir entry with the data from the old entry */ hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name); - err = hfs_brec_find(&dst_fd); + err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key); if (err != -ENOENT) { if (!err) err = -EEXIST; @@ -391,7 +399,7 @@ int hfsplus_rename_cat(u32 cnid, /* finally remove the old entry */ hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); - err = hfs_brec_find(&src_fd); + err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); if (err) goto out; err = hfs_brec_remove(&src_fd); @@ -402,7 +410,7 @@ int hfsplus_rename_cat(u32 cnid, /* remove old thread entry */ hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL); - err = hfs_brec_find(&src_fd); + err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); if (err) goto out; type = hfs_bnode_read_u16(src_fd.bnode, src_fd.entryoffset); @@ -414,7 +422,7 @@ int hfsplus_rename_cat(u32 cnid, hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL); entry_size = hfsplus_fill_cat_thread(sb, &entry, type, dst_dir->i_ino, dst_name); - err = hfs_brec_find(&dst_fd); + err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key); if (err != -ENOENT) { if (!err) err = -EEXIST; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 074e04589248..031c24e50521 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -15,6 +15,7 @@ #include "hfsplus_fs.h" #include "hfsplus_raw.h" +#include "xattr.h" static inline void hfsplus_instantiate(struct dentry *dentry, struct inode *inode, u32 cnid) @@ -138,7 +139,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) if (err) return err; hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); - err = hfs_brec_find(&fd); + err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; @@ -421,6 +422,15 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry, if (res) goto out_err; + res = hfsplus_init_inode_security(inode, dir, &dentry->d_name); + if (res == -EOPNOTSUPP) + res = 0; /* Operation is not supported. */ + else if (res) { + /* Try to delete anyway without error analysis. */ + hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name); + goto out_err; + } + hfsplus_instantiate(dentry, inode, inode->i_ino); mark_inode_dirty(inode); goto out; @@ -450,15 +460,26 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, mode, rdev); res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); - if (res) { - clear_nlink(inode); - hfsplus_delete_inode(inode); - iput(inode); - goto out; + if (res) + goto failed_mknod; + + res = hfsplus_init_inode_security(inode, dir, &dentry->d_name); + if (res == -EOPNOTSUPP) + res = 0; /* Operation is not supported. */ + else if (res) { + /* Try to delete anyway without error analysis. */ + hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name); + goto failed_mknod; } hfsplus_instantiate(dentry, inode, inode->i_ino); mark_inode_dirty(inode); + goto out; + +failed_mknod: + clear_nlink(inode); + hfsplus_delete_inode(inode); + iput(inode); out: mutex_unlock(&sbi->vh_mutex); return res; @@ -499,15 +520,19 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, } const struct inode_operations hfsplus_dir_inode_operations = { - .lookup = hfsplus_lookup, - .create = hfsplus_create, - .link = hfsplus_link, - .unlink = hfsplus_unlink, - .mkdir = hfsplus_mkdir, - .rmdir = hfsplus_rmdir, - .symlink = hfsplus_symlink, - .mknod = hfsplus_mknod, - .rename = hfsplus_rename, + .lookup = hfsplus_lookup, + .create = hfsplus_create, + .link = hfsplus_link, + .unlink = hfsplus_unlink, + .mkdir = hfsplus_mkdir, + .rmdir = hfsplus_rmdir, + .symlink = hfsplus_symlink, + .mknod = hfsplus_mknod, + .rename = hfsplus_rename, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = hfsplus_listxattr, + .removexattr = hfsplus_removexattr, }; const struct file_operations hfsplus_dir_operations = { diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index eba76eab6d62..a94f0f779d5e 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -95,7 +95,7 @@ static void __hfsplus_ext_write_extent(struct inode *inode, HFSPLUS_IS_RSRC(inode) ? HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); - res = hfs_brec_find(fd); + res = hfs_brec_find(fd, hfs_find_rec_by_key); if (hip->extent_state & HFSPLUS_EXT_NEW) { if (res != -ENOENT) return; @@ -154,7 +154,7 @@ static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, hfsplus_ext_build_key(fd->search_key, cnid, block, type); fd->key->ext.cnid = 0; - res = hfs_brec_find(fd); + res = hfs_brec_find(fd, hfs_find_rec_by_key); if (res && res != -ENOENT) return res; if (fd->key->ext.cnid != fd->search_key->ext.cnid || diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index a6da86b1b4c1..05b11f36024c 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -23,6 +23,7 @@ #define DBG_SUPER 0x00000010 #define DBG_EXTENT 0x00000020 #define DBG_BITMAP 0x00000040 +#define DBG_ATTR_MOD 0x00000080 #if 0 #define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD) @@ -46,6 +47,13 @@ typedef int (*btree_keycmp)(const hfsplus_btree_key *, #define NODE_HASH_SIZE 256 +/* B-tree mutex nested subclasses */ +enum hfsplus_btree_mutex_classes { + CATALOG_BTREE_MUTEX, + EXTENTS_BTREE_MUTEX, + ATTR_BTREE_MUTEX, +}; + /* An HFS+ BTree held in memory */ struct hfs_btree { struct super_block *sb; @@ -223,6 +231,7 @@ struct hfsplus_inode_info { #define HFSPLUS_I_CAT_DIRTY 1 /* has changes in the catalog tree */ #define HFSPLUS_I_EXT_DIRTY 2 /* has changes in the extent tree */ #define HFSPLUS_I_ALLOC_DIRTY 3 /* has changes in the allocation file */ +#define HFSPLUS_I_ATTR_DIRTY 4 /* has changes in the attributes tree */ #define HFSPLUS_IS_RSRC(inode) \ test_bit(HFSPLUS_I_RSRC, &HFSPLUS_I(inode)->flags) @@ -302,7 +311,7 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb) #define hfs_brec_remove hfsplus_brec_remove #define hfs_find_init hfsplus_find_init #define hfs_find_exit hfsplus_find_exit -#define __hfs_brec_find __hplusfs_brec_find +#define __hfs_brec_find __hfsplus_brec_find #define hfs_brec_find hfsplus_brec_find #define hfs_brec_read hfsplus_brec_read #define hfs_brec_goto hfsplus_brec_goto @@ -324,10 +333,33 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb) */ #define HFSPLUS_IOC_BLESS _IO('h', 0x80) +typedef int (*search_strategy_t)(struct hfs_bnode *, + struct hfs_find_data *, + int *, int *, int *); + /* * Functions in any *.c used in other files */ +/* attributes.c */ +int hfsplus_create_attr_tree_cache(void); +void hfsplus_destroy_attr_tree_cache(void); +hfsplus_attr_entry *hfsplus_alloc_attr_entry(void); +void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry_p); +int hfsplus_attr_bin_cmp_key(const hfsplus_btree_key *, + const hfsplus_btree_key *); +int hfsplus_attr_build_key(struct super_block *, hfsplus_btree_key *, + u32, const char *); +void hfsplus_attr_build_key_uni(hfsplus_btree_key *key, + u32 cnid, + struct hfsplus_attr_unistr *name); +int hfsplus_find_attr(struct super_block *, u32, + const char *, struct hfs_find_data *); +int hfsplus_attr_exists(struct inode *inode, const char *name); +int hfsplus_create_attr(struct inode *, const char *, const void *, size_t); +int hfsplus_delete_attr(struct inode *, const char *); +int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid); + /* bitmap.c */ int hfsplus_block_allocate(struct super_block *, u32, u32, u32 *); int hfsplus_block_free(struct super_block *, u32, u32); @@ -369,8 +401,15 @@ int hfs_brec_remove(struct hfs_find_data *); /* bfind.c */ int hfs_find_init(struct hfs_btree *, struct hfs_find_data *); void hfs_find_exit(struct hfs_find_data *); -int __hfs_brec_find(struct hfs_bnode *, struct hfs_find_data *); -int hfs_brec_find(struct hfs_find_data *); +int hfs_find_1st_rec_by_cnid(struct hfs_bnode *, + struct hfs_find_data *, + int *, int *, int *); +int hfs_find_rec_by_key(struct hfs_bnode *, + struct hfs_find_data *, + int *, int *, int *); +int __hfs_brec_find(struct hfs_bnode *, struct hfs_find_data *, + search_strategy_t); +int hfs_brec_find(struct hfs_find_data *, search_strategy_t); int hfs_brec_read(struct hfs_find_data *, void *, int); int hfs_brec_goto(struct hfs_find_data *, int); @@ -417,11 +456,6 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, /* ioctl.c */ long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); -int hfsplus_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); -ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, - void *value, size_t size); -ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); /* options.c */ int hfsplus_parse_options(char *, struct hfsplus_sb_info *); @@ -446,7 +480,7 @@ int hfsplus_strcmp(const struct hfsplus_unistr *, int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *); int hfsplus_asc2uni(struct super_block *, - struct hfsplus_unistr *, const char *, int); + struct hfsplus_unistr *, int, const char *, int); int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, struct qstr *str); int hfsplus_compare_dentry(const struct dentry *parent, diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index 921967e5abb1..452ede01b036 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h @@ -52,13 +52,23 @@ typedef __be32 hfsplus_cnid; typedef __be16 hfsplus_unichr; +#define HFSPLUS_MAX_STRLEN 255 +#define HFSPLUS_ATTR_MAX_STRLEN 127 + /* A "string" as used in filenames, etc. */ struct hfsplus_unistr { __be16 length; - hfsplus_unichr unicode[255]; + hfsplus_unichr unicode[HFSPLUS_MAX_STRLEN]; } __packed; -#define HFSPLUS_MAX_STRLEN 255 +/* + * A "string" is used in attributes file + * for name of extended attribute + */ +struct hfsplus_attr_unistr { + __be16 length; + hfsplus_unichr unicode[HFSPLUS_ATTR_MAX_STRLEN]; +} __packed; /* POSIX permissions */ struct hfsplus_perm { @@ -291,6 +301,8 @@ struct hfsplus_cat_file { /* File attribute bits */ #define HFSPLUS_FILE_LOCKED 0x0001 #define HFSPLUS_FILE_THREAD_EXISTS 0x0002 +#define HFSPLUS_XATTR_EXISTS 0x0004 +#define HFSPLUS_ACL_EXISTS 0x0008 /* HFS+ catalog thread (part of a cat_entry) */ struct hfsplus_cat_thread { @@ -327,11 +339,63 @@ struct hfsplus_ext_key { #define HFSPLUS_EXT_KEYLEN sizeof(struct hfsplus_ext_key) +#define HFSPLUS_XATTR_FINDER_INFO_NAME "com.apple.FinderInfo" +#define HFSPLUS_XATTR_ACL_NAME "com.apple.system.Security" + +#define HFSPLUS_ATTR_INLINE_DATA 0x10 +#define HFSPLUS_ATTR_FORK_DATA 0x20 +#define HFSPLUS_ATTR_EXTENTS 0x30 + +/* HFS+ attributes tree key */ +struct hfsplus_attr_key { + __be16 key_len; + __be16 pad; + hfsplus_cnid cnid; + __be32 start_block; + struct hfsplus_attr_unistr key_name; +} __packed; + +#define HFSPLUS_ATTR_KEYLEN sizeof(struct hfsplus_attr_key) + +/* HFS+ fork data attribute */ +struct hfsplus_attr_fork_data { + __be32 record_type; + __be32 reserved; + struct hfsplus_fork_raw the_fork; +} __packed; + +/* HFS+ extension attribute */ +struct hfsplus_attr_extents { + __be32 record_type; + __be32 reserved; + struct hfsplus_extent extents; +} __packed; + +#define HFSPLUS_MAX_INLINE_DATA_SIZE 3802 + +/* HFS+ attribute inline data */ +struct hfsplus_attr_inline_data { + __be32 record_type; + __be32 reserved1; + u8 reserved2[6]; + __be16 length; + u8 raw_bytes[HFSPLUS_MAX_INLINE_DATA_SIZE]; +} __packed; + +/* A data record in the attributes tree */ +typedef union { + __be32 record_type; + struct hfsplus_attr_fork_data fork_data; + struct hfsplus_attr_extents extents; + struct hfsplus_attr_inline_data inline_data; +} __packed hfsplus_attr_entry; + /* HFS+ generic BTree key */ typedef union { __be16 key_len; struct hfsplus_cat_key cat; struct hfsplus_ext_key ext; + struct hfsplus_attr_key attr; } __packed hfsplus_btree_key; #endif diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index dcd05be5344b..160ccc9cdb4b 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -17,6 +17,7 @@ #include "hfsplus_fs.h" #include "hfsplus_raw.h" +#include "xattr.h" static int hfsplus_readpage(struct file *file, struct page *page) { @@ -348,6 +349,18 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, error = error2; } + if (test_and_clear_bit(HFSPLUS_I_ATTR_DIRTY, &hip->flags)) { + if (sbi->attr_tree) { + error2 = + filemap_write_and_wait( + sbi->attr_tree->inode->i_mapping); + if (!error) + error = error2; + } else { + printk(KERN_ERR "hfs: sync non-existent attributes tree\n"); + } + } + if (test_and_clear_bit(HFSPLUS_I_ALLOC_DIRTY, &hip->flags)) { error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping); if (!error) @@ -365,9 +378,10 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, static const struct inode_operations hfsplus_file_inode_operations = { .lookup = hfsplus_file_lookup, .setattr = hfsplus_setattr, - .setxattr = hfsplus_setxattr, - .getxattr = hfsplus_getxattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, .listxattr = hfsplus_listxattr, + .removexattr = hfsplus_removexattr, }; static const struct file_operations hfsplus_file_operations = { diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index e3c4c4209428..d3ff5cc317d7 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c @@ -16,7 +16,6 @@ #include <linux/fs.h> #include <linux/mount.h> #include <linux/sched.h> -#include <linux/xattr.h> #include <asm/uaccess.h> #include "hfsplus_fs.h" @@ -151,110 +150,3 @@ long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -ENOTTY; } } - -int hfsplus_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) -{ - struct inode *inode = dentry->d_inode; - struct hfs_find_data fd; - hfsplus_cat_entry entry; - struct hfsplus_cat_file *file; - int res; - - if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode)) - return -EOPNOTSUPP; - - res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); - if (res) - return res; - res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); - if (res) - goto out; - hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, - sizeof(struct hfsplus_cat_file)); - file = &entry.file; - - if (!strcmp(name, "hfs.type")) { - if (size == 4) - memcpy(&file->user_info.fdType, value, 4); - else - res = -ERANGE; - } else if (!strcmp(name, "hfs.creator")) { - if (size == 4) - memcpy(&file->user_info.fdCreator, value, 4); - else - res = -ERANGE; - } else - res = -EOPNOTSUPP; - if (!res) { - hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, - sizeof(struct hfsplus_cat_file)); - hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY); - } -out: - hfs_find_exit(&fd); - return res; -} - -ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, - void *value, size_t size) -{ - struct inode *inode = dentry->d_inode; - struct hfs_find_data fd; - hfsplus_cat_entry entry; - struct hfsplus_cat_file *file; - ssize_t res = 0; - - if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode)) - return -EOPNOTSUPP; - - if (size) { - res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); - if (res) - return res; - res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); - if (res) - goto out; - hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, - sizeof(struct hfsplus_cat_file)); - } - file = &entry.file; - - if (!strcmp(name, "hfs.type")) { - if (size >= 4) { - memcpy(value, &file->user_info.fdType, 4); - res = 4; - } else - res = size ? -ERANGE : 4; - } else if (!strcmp(name, "hfs.creator")) { - if (size >= 4) { - memcpy(value, &file->user_info.fdCreator, 4); - res = 4; - } else - res = size ? -ERANGE : 4; - } else - res = -EOPNOTSUPP; -out: - if (size) - hfs_find_exit(&fd); - return res; -} - -#define HFSPLUS_ATTRLIST_SIZE (sizeof("hfs.creator")+sizeof("hfs.type")) - -ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) -{ - struct inode *inode = dentry->d_inode; - - if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode)) - return -EOPNOTSUPP; - - if (!buffer || !size) - return HFSPLUS_ATTRLIST_SIZE; - if (size < HFSPLUS_ATTRLIST_SIZE) - return -ERANGE; - strcpy(buffer, "hfs.type"); - strcpy(buffer + sizeof("hfs.type"), "hfs.creator"); - - return HFSPLUS_ATTRLIST_SIZE; -} diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 796198d26553..974c26f96fae 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -20,6 +20,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb); static void hfsplus_destroy_inode(struct inode *inode); #include "hfsplus_fs.h" +#include "xattr.h" static int hfsplus_system_read_inode(struct inode *inode) { @@ -118,6 +119,7 @@ static int hfsplus_system_write_inode(struct inode *inode) case HFSPLUS_ATTR_CNID: fork = &vhdr->attr_file; tree = sbi->attr_tree; + break; default: return -EIO; } @@ -191,6 +193,12 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) error2 = filemap_write_and_wait(sbi->ext_tree->inode->i_mapping); if (!error) error = error2; + if (sbi->attr_tree) { + error2 = + filemap_write_and_wait(sbi->attr_tree->inode->i_mapping); + if (!error) + error = error2; + } error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping); if (!error) error = error2; @@ -281,6 +289,7 @@ static void hfsplus_put_super(struct super_block *sb) hfsplus_sync_fs(sb, 1); } + hfs_btree_close(sbi->attr_tree); hfs_btree_close(sbi->cat_tree); hfs_btree_close(sbi->ext_tree); iput(sbi->alloc_file); @@ -477,12 +486,20 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) printk(KERN_ERR "hfs: failed to load catalog file\n"); goto out_close_ext_tree; } + if (vhdr->attr_file.total_blocks != 0) { + sbi->attr_tree = hfs_btree_open(sb, HFSPLUS_ATTR_CNID); + if (!sbi->attr_tree) { + printk(KERN_ERR "hfs: failed to load attributes file\n"); + goto out_close_cat_tree; + } + } + sb->s_xattr = hfsplus_xattr_handlers; inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID); if (IS_ERR(inode)) { printk(KERN_ERR "hfs: failed to load allocation file\n"); err = PTR_ERR(inode); - goto out_close_cat_tree; + goto out_close_attr_tree; } sbi->alloc_file = inode; @@ -542,10 +559,27 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) } err = hfsplus_create_cat(sbi->hidden_dir->i_ino, root, &str, sbi->hidden_dir); - mutex_unlock(&sbi->vh_mutex); - if (err) + if (err) { + mutex_unlock(&sbi->vh_mutex); + goto out_put_hidden_dir; + } + + err = hfsplus_init_inode_security(sbi->hidden_dir, + root, &str); + if (err == -EOPNOTSUPP) + err = 0; /* Operation is not supported. */ + else if (err) { + /* + * Try to delete anyway without + * error analysis. + */ + hfsplus_delete_cat(sbi->hidden_dir->i_ino, + root, &str); + mutex_unlock(&sbi->vh_mutex); goto out_put_hidden_dir; + } + mutex_unlock(&sbi->vh_mutex); hfsplus_mark_inode_dirty(sbi->hidden_dir, HFSPLUS_I_CAT_DIRTY); } @@ -562,6 +596,8 @@ out_put_root: sb->s_root = NULL; out_put_alloc_file: iput(sbi->alloc_file); +out_close_attr_tree: + hfs_btree_close(sbi->attr_tree); out_close_cat_tree: hfs_btree_close(sbi->cat_tree); out_close_ext_tree: @@ -635,9 +671,20 @@ static int __init init_hfsplus_fs(void) hfsplus_init_once); if (!hfsplus_inode_cachep) return -ENOMEM; + err = hfsplus_create_attr_tree_cache(); + if (err) + goto destroy_inode_cache; err = register_filesystem(&hfsplus_fs_type); if (err) - kmem_cache_destroy(hfsplus_inode_cachep); + goto destroy_attr_tree_cache; + return 0; + +destroy_attr_tree_cache: + hfsplus_destroy_attr_tree_cache(); + +destroy_inode_cache: + kmem_cache_destroy(hfsplus_inode_cachep); + return err; } @@ -650,6 +697,7 @@ static void __exit exit_hfsplus_fs(void) * destroy cache. */ rcu_barrier(); + hfsplus_destroy_attr_tree_cache(); kmem_cache_destroy(hfsplus_inode_cachep); } diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index a32998f29f0b..2c2e47dcfdd8 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c @@ -295,7 +295,8 @@ static inline u16 *decompose_unichar(wchar_t uc, int *size) return hfsplus_decompose_table + (off / 4); } -int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, +int hfsplus_asc2uni(struct super_block *sb, + struct hfsplus_unistr *ustr, int max_unistr_len, const char *astr, int len) { int size, dsize, decompose; @@ -303,7 +304,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, wchar_t c; decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); - while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { + while (outlen < max_unistr_len && len > 0) { size = asc2unichar(sb, astr, len, &c); if (decompose) @@ -311,7 +312,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, else dstr = NULL; if (dstr) { - if (outlen + dsize > HFSPLUS_MAX_STRLEN) + if (outlen + dsize > max_unistr_len) break; do { ustr->unicode[outlen++] = cpu_to_be16(*dstr++); diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c new file mode 100644 index 000000000000..e8a4b0815c61 --- /dev/null +++ b/fs/hfsplus/xattr.c @@ -0,0 +1,709 @@ +/* + * linux/fs/hfsplus/xattr.c + * + * Vyacheslav Dubeyko <slava@dubeyko.com> + * + * Logic of processing extended attributes + */ + +#include "hfsplus_fs.h" +#include "xattr.h" + +const struct xattr_handler *hfsplus_xattr_handlers[] = { + &hfsplus_xattr_osx_handler, + &hfsplus_xattr_user_handler, + &hfsplus_xattr_trusted_handler, + &hfsplus_xattr_security_handler, + NULL +}; + +static int strcmp_xattr_finder_info(const char *name) +{ + if (name) { + return strncmp(name, HFSPLUS_XATTR_FINDER_INFO_NAME, + sizeof(HFSPLUS_XATTR_FINDER_INFO_NAME)); + } + return -1; +} + +static int strcmp_xattr_acl(const char *name) +{ + if (name) { + return strncmp(name, HFSPLUS_XATTR_ACL_NAME, + sizeof(HFSPLUS_XATTR_ACL_NAME)); + } + return -1; +} + +static inline int is_known_namespace(const char *name) +{ + if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) && + strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && + strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && + strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) + return false; + + return true; +} + +static int can_set_xattr(struct inode *inode, const char *name, + const void *value, size_t value_len) +{ + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return -EOPNOTSUPP; /* TODO: implement ACL support */ + + if (!strncmp(name, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN)) { + /* + * This makes sure that we aren't trying to set an + * attribute in a different namespace by prefixing it + * with "osx." + */ + if (is_known_namespace(name + XATTR_MAC_OSX_PREFIX_LEN)) + return -EOPNOTSUPP; + + return 0; + } + + /* + * Don't allow setting an attribute in an unknown namespace. + */ + if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) && + strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && + strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) + return -EOPNOTSUPP; + + return 0; +} + +int __hfsplus_setxattr(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + int err = 0; + struct hfs_find_data cat_fd; + hfsplus_cat_entry entry; + u16 cat_entry_flags, cat_entry_type; + u16 folder_finderinfo_len = sizeof(struct DInfo) + + sizeof(struct DXInfo); + u16 file_finderinfo_len = sizeof(struct FInfo) + + sizeof(struct FXInfo); + + if ((!S_ISREG(inode->i_mode) && + !S_ISDIR(inode->i_mode)) || + HFSPLUS_IS_RSRC(inode)) + return -EOPNOTSUPP; + + err = can_set_xattr(inode, name, value, size); + if (err) + return err; + + if (strncmp(name, XATTR_MAC_OSX_PREFIX, + XATTR_MAC_OSX_PREFIX_LEN) == 0) + name += XATTR_MAC_OSX_PREFIX_LEN; + + if (value == NULL) { + value = ""; + size = 0; + } + + err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &cat_fd); + if (err) { + printk(KERN_ERR "hfs: can't init xattr find struct\n"); + return err; + } + + err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &cat_fd); + if (err) { + printk(KERN_ERR "hfs: catalog searching failed\n"); + goto end_setxattr; + } + + if (!strcmp_xattr_finder_info(name)) { + if (flags & XATTR_CREATE) { + printk(KERN_ERR "hfs: xattr exists yet\n"); + err = -EOPNOTSUPP; + goto end_setxattr; + } + hfs_bnode_read(cat_fd.bnode, &entry, cat_fd.entryoffset, + sizeof(hfsplus_cat_entry)); + if (be16_to_cpu(entry.type) == HFSPLUS_FOLDER) { + if (size == folder_finderinfo_len) { + memcpy(&entry.folder.user_info, value, + folder_finderinfo_len); + hfs_bnode_write(cat_fd.bnode, &entry, + cat_fd.entryoffset, + sizeof(struct hfsplus_cat_folder)); + hfsplus_mark_inode_dirty(inode, + HFSPLUS_I_CAT_DIRTY); + } else { + err = -ERANGE; + goto end_setxattr; + } + } else if (be16_to_cpu(entry.type) == HFSPLUS_FILE) { + if (size == file_finderinfo_len) { + memcpy(&entry.file.user_info, value, + file_finderinfo_len); + hfs_bnode_write(cat_fd.bnode, &entry, + cat_fd.entryoffset, + sizeof(struct hfsplus_cat_file)); + hfsplus_mark_inode_dirty(inode, + HFSPLUS_I_CAT_DIRTY); + } else { + err = -ERANGE; + goto end_setxattr; + } + } else { + err = -EOPNOTSUPP; + goto end_setxattr; + } + goto end_setxattr; + } + + if (!HFSPLUS_SB(inode->i_sb)->attr_tree) { + err = -EOPNOTSUPP; + goto end_setxattr; + } + + if (hfsplus_attr_exists(inode, name)) { + if (flags & XATTR_CREATE) { + printk(KERN_ERR "hfs: xattr exists yet\n"); + err = -EOPNOTSUPP; + goto end_setxattr; + } + err = hfsplus_delete_attr(inode, name); + if (err) + goto end_setxattr; + err = hfsplus_create_attr(inode, name, value, size); + if (err) + goto end_setxattr; + } else { + if (flags & XATTR_REPLACE) { + printk(KERN_ERR "hfs: cannot replace xattr\n"); + err = -EOPNOTSUPP; + goto end_setxattr; + } + err = hfsplus_create_attr(inode, name, value, size); + if (err) + goto end_setxattr; + } + + cat_entry_type = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset); + if (cat_entry_type == HFSPLUS_FOLDER) { + cat_entry_flags = hfs_bnode_read_u16(cat_fd.bnode, + cat_fd.entryoffset + + offsetof(struct hfsplus_cat_folder, flags)); + cat_entry_flags |= HFSPLUS_XATTR_EXISTS; + if (!strcmp_xattr_acl(name)) + cat_entry_flags |= HFSPLUS_ACL_EXISTS; + hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset + + offsetof(struct hfsplus_cat_folder, flags), + cat_entry_flags); + hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY); + } else if (cat_entry_type == HFSPLUS_FILE) { + cat_entry_flags = hfs_bnode_read_u16(cat_fd.bnode, + cat_fd.entryoffset + + offsetof(struct hfsplus_cat_file, flags)); + cat_entry_flags |= HFSPLUS_XATTR_EXISTS; + if (!strcmp_xattr_acl(name)) + cat_entry_flags |= HFSPLUS_ACL_EXISTS; + hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset + + offsetof(struct hfsplus_cat_file, flags), + cat_entry_flags); + hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY); + } else { + printk(KERN_ERR "hfs: invalid catalog entry type\n"); + err = -EIO; + goto end_setxattr; + } + +end_setxattr: + hfs_find_exit(&cat_fd); + return err; +} + +static inline int is_osx_xattr(const char *xattr_name) +{ + return !is_known_namespace(xattr_name); +} + +static int name_len(const char *xattr_name, int xattr_name_len) +{ + int len = xattr_name_len + 1; + + if (is_osx_xattr(xattr_name)) + len += XATTR_MAC_OSX_PREFIX_LEN; + + return len; +} + +static int copy_name(char *buffer, const char *xattr_name, int name_len) +{ + int len = name_len; + int offset = 0; + + if (is_osx_xattr(xattr_name)) { + strncpy(buffer, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN); + offset += XATTR_MAC_OSX_PREFIX_LEN; + len += XATTR_MAC_OSX_PREFIX_LEN; + } + + strncpy(buffer + offset, xattr_name, name_len); + memset(buffer + offset + name_len, 0, 1); + len += 1; + + return len; +} + +static ssize_t hfsplus_getxattr_finder_info(struct dentry *dentry, + void *value, size_t size) +{ + ssize_t res = 0; + struct inode *inode = dentry->d_inode; + struct hfs_find_data fd; + u16 entry_type; + u16 folder_rec_len = sizeof(struct DInfo) + sizeof(struct DXInfo); + u16 file_rec_len = sizeof(struct FInfo) + sizeof(struct FXInfo); + u16 record_len = max(folder_rec_len, file_rec_len); + u8 folder_finder_info[sizeof(struct DInfo) + sizeof(struct DXInfo)]; + u8 file_finder_info[sizeof(struct FInfo) + sizeof(struct FXInfo)]; + + if (size >= record_len) { + res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); + if (res) { + printk(KERN_ERR "hfs: can't init xattr find struct\n"); + return res; + } + res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); + if (res) + goto end_getxattr_finder_info; + entry_type = hfs_bnode_read_u16(fd.bnode, fd.entryoffset); + + if (entry_type == HFSPLUS_FOLDER) { + hfs_bnode_read(fd.bnode, folder_finder_info, + fd.entryoffset + + offsetof(struct hfsplus_cat_folder, user_info), + folder_rec_len); + memcpy(value, folder_finder_info, folder_rec_len); + res = folder_rec_len; + } else if (entry_type == HFSPLUS_FILE) { + hfs_bnode_read(fd.bnode, file_finder_info, + fd.entryoffset + + offsetof(struct hfsplus_cat_file, user_info), + file_rec_len); + memcpy(value, file_finder_info, file_rec_len); + res = file_rec_len; + } else { + res = -EOPNOTSUPP; + goto end_getxattr_finder_info; + } + } else + res = size ? -ERANGE : record_len; + +end_getxattr_finder_info: + if (size >= record_len) + hfs_find_exit(&fd); + return res; +} + +ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, + void *value, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct hfs_find_data fd; + hfsplus_attr_entry *entry; + __be32 xattr_record_type; + u32 record_type; + u16 record_length = 0; + ssize_t res = 0; + + if ((!S_ISREG(inode->i_mode) && + !S_ISDIR(inode->i_mode)) || + HFSPLUS_IS_RSRC(inode)) + return -EOPNOTSUPP; + + if (strncmp(name, XATTR_MAC_OSX_PREFIX, + XATTR_MAC_OSX_PREFIX_LEN) == 0) { + /* skip "osx." prefix */ + name += XATTR_MAC_OSX_PREFIX_LEN; + /* + * Don't allow retrieving properly prefixed attributes + * by prepending them with "osx." + */ + if (is_known_namespace(name)) + return -EOPNOTSUPP; + } + + if (!strcmp_xattr_finder_info(name)) + return hfsplus_getxattr_finder_info(dentry, value, size); + + if (!HFSPLUS_SB(inode->i_sb)->attr_tree) + return -EOPNOTSUPP; + + entry = hfsplus_alloc_attr_entry(); + if (!entry) { + printk(KERN_ERR "hfs: can't allocate xattr entry\n"); + return -ENOMEM; + } + + res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->attr_tree, &fd); + if (res) { + printk(KERN_ERR "hfs: can't init xattr find struct\n"); + goto failed_getxattr_init; + } + + res = hfsplus_find_attr(inode->i_sb, inode->i_ino, name, &fd); + if (res) { + if (res == -ENOENT) + res = -ENODATA; + else + printk(KERN_ERR "hfs: xattr searching failed\n"); + goto out; + } + + hfs_bnode_read(fd.bnode, &xattr_record_type, + fd.entryoffset, sizeof(xattr_record_type)); + record_type = be32_to_cpu(xattr_record_type); + if (record_type == HFSPLUS_ATTR_INLINE_DATA) { + record_length = hfs_bnode_read_u16(fd.bnode, + fd.entryoffset + + offsetof(struct hfsplus_attr_inline_data, + length)); + if (record_length > HFSPLUS_MAX_INLINE_DATA_SIZE) { + printk(KERN_ERR "hfs: invalid xattr record size\n"); + res = -EIO; + goto out; + } + } else if (record_type == HFSPLUS_ATTR_FORK_DATA || + record_type == HFSPLUS_ATTR_EXTENTS) { + printk(KERN_ERR "hfs: only inline data xattr are supported\n"); + res = -EOPNOTSUPP; + goto out; + } else { + printk(KERN_ERR "hfs: invalid xattr record\n"); + res = -EIO; + goto out; + } + + if (size) { + hfs_bnode_read(fd.bnode, entry, fd.entryoffset, + offsetof(struct hfsplus_attr_inline_data, + raw_bytes) + record_length); + } + + if (size >= record_length) { + memcpy(value, entry->inline_data.raw_bytes, record_length); + res = record_length; + } else + res = size ? -ERANGE : record_length; + +out: + hfs_find_exit(&fd); + +failed_getxattr_init: + hfsplus_destroy_attr_entry(entry); + return res; +} + +static inline int can_list(const char *xattr_name) +{ + if (!xattr_name) + return 0; + + return strncmp(xattr_name, XATTR_TRUSTED_PREFIX, + XATTR_TRUSTED_PREFIX_LEN) || + capable(CAP_SYS_ADMIN); +} + +static ssize_t hfsplus_listxattr_finder_info(struct dentry *dentry, + char *buffer, size_t size) +{ + ssize_t res = 0; + struct inode *inode = dentry->d_inode; + struct hfs_find_data fd; + u16 entry_type; + u8 folder_finder_info[sizeof(struct DInfo) + sizeof(struct DXInfo)]; + u8 file_finder_info[sizeof(struct FInfo) + sizeof(struct FXInfo)]; + unsigned long len, found_bit; + int xattr_name_len, symbols_count; + + res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); + if (res) { + printk(KERN_ERR "hfs: can't init xattr find struct\n"); + return res; + } + + res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); + if (res) + goto end_listxattr_finder_info; + + entry_type = hfs_bnode_read_u16(fd.bnode, fd.entryoffset); + if (entry_type == HFSPLUS_FOLDER) { + len = sizeof(struct DInfo) + sizeof(struct DXInfo); + hfs_bnode_read(fd.bnode, folder_finder_info, + fd.entryoffset + + offsetof(struct hfsplus_cat_folder, user_info), + len); + found_bit = find_first_bit((void *)folder_finder_info, len*8); + } else if (entry_type == HFSPLUS_FILE) { + len = sizeof(struct FInfo) + sizeof(struct FXInfo); + hfs_bnode_read(fd.bnode, file_finder_info, + fd.entryoffset + + offsetof(struct hfsplus_cat_file, user_info), + len); + found_bit = find_first_bit((void *)file_finder_info, len*8); + } else { + res = -EOPNOTSUPP; + goto end_listxattr_finder_info; + } + + if (found_bit >= (len*8)) + res = 0; + else { + symbols_count = sizeof(HFSPLUS_XATTR_FINDER_INFO_NAME) - 1; + xattr_name_len = + name_len(HFSPLUS_XATTR_FINDER_INFO_NAME, symbols_count); + if (!buffer || !size) { + if (can_list(HFSPLUS_XATTR_FINDER_INFO_NAME)) + res = xattr_name_len; + } else if (can_list(HFSPLUS_XATTR_FINDER_INFO_NAME)) { + if (size < xattr_name_len) + res = -ERANGE; + else { + res = copy_name(buffer, + HFSPLUS_XATTR_FINDER_INFO_NAME, + symbols_count); + } + } + } + +end_listxattr_finder_info: + hfs_find_exit(&fd); + + return res; +} + +ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + ssize_t err; + ssize_t res = 0; + struct inode *inode = dentry->d_inode; + struct hfs_find_data fd; + u16 key_len = 0; + struct hfsplus_attr_key attr_key; + char strbuf[HFSPLUS_ATTR_MAX_STRLEN + + XATTR_MAC_OSX_PREFIX_LEN + 1] = {0}; + int xattr_name_len; + + if ((!S_ISREG(inode->i_mode) && + !S_ISDIR(inode->i_mode)) || + HFSPLUS_IS_RSRC(inode)) + return -EOPNOTSUPP; + + res = hfsplus_listxattr_finder_info(dentry, buffer, size); + if (res < 0) + return res; + else if (!HFSPLUS_SB(inode->i_sb)->attr_tree) + return (res == 0) ? -EOPNOTSUPP : res; + + err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->attr_tree, &fd); + if (err) { + printk(KERN_ERR "hfs: can't init xattr find struct\n"); + return err; + } + + err = hfsplus_find_attr(inode->i_sb, inode->i_ino, NULL, &fd); + if (err) { + if (err == -ENOENT) { + if (res == 0) + res = -ENODATA; + goto end_listxattr; + } else { + res = err; + goto end_listxattr; + } + } + + for (;;) { + key_len = hfs_bnode_read_u16(fd.bnode, fd.keyoffset); + if (key_len == 0 || key_len > fd.tree->max_key_len) { + printk(KERN_ERR "hfs: invalid xattr key length: %d\n", + key_len); + res = -EIO; + goto end_listxattr; + } + + hfs_bnode_read(fd.bnode, &attr_key, + fd.keyoffset, key_len + sizeof(key_len)); + + if (be32_to_cpu(attr_key.cnid) != inode->i_ino) + goto end_listxattr; + + xattr_name_len = HFSPLUS_ATTR_MAX_STRLEN; + if (hfsplus_uni2asc(inode->i_sb, + (const struct hfsplus_unistr *)&fd.key->attr.key_name, + strbuf, &xattr_name_len)) { + printk(KERN_ERR "hfs: unicode conversion failed\n"); + res = -EIO; + goto end_listxattr; + } + + if (!buffer || !size) { + if (can_list(strbuf)) + res += name_len(strbuf, xattr_name_len); + } else if (can_list(strbuf)) { + if (size < (res + name_len(strbuf, xattr_name_len))) { + res = -ERANGE; + goto end_listxattr; + } else + res += copy_name(buffer + res, + strbuf, xattr_name_len); + } + + if (hfs_brec_goto(&fd, 1)) + goto end_listxattr; + } + +end_listxattr: + hfs_find_exit(&fd); + return res; +} + +int hfsplus_removexattr(struct dentry *dentry, const char *name) +{ + int err = 0; + struct inode *inode = dentry->d_inode; + struct hfs_find_data cat_fd; + u16 flags; + u16 cat_entry_type; + int is_xattr_acl_deleted = 0; + int is_all_xattrs_deleted = 0; + + if ((!S_ISREG(inode->i_mode) && + !S_ISDIR(inode->i_mode)) || + HFSPLUS_IS_RSRC(inode)) + return -EOPNOTSUPP; + + if (!HFSPLUS_SB(inode->i_sb)->attr_tree) + return -EOPNOTSUPP; + + err = can_set_xattr(inode, name, NULL, 0); + if (err) + return err; + + if (strncmp(name, XATTR_MAC_OSX_PREFIX, + XATTR_MAC_OSX_PREFIX_LEN) == 0) + name += XATTR_MAC_OSX_PREFIX_LEN; + + if (!strcmp_xattr_finder_info(name)) + return -EOPNOTSUPP; + + err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &cat_fd); + if (err) { + printk(KERN_ERR "hfs: can't init xattr find struct\n"); + return err; + } + + err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &cat_fd); + if (err) { + printk(KERN_ERR "hfs: catalog searching failed\n"); + goto end_removexattr; + } + + err = hfsplus_delete_attr(inode, name); + if (err) + goto end_removexattr; + + is_xattr_acl_deleted = !strcmp_xattr_acl(name); + is_all_xattrs_deleted = !hfsplus_attr_exists(inode, NULL); + + if (!is_xattr_acl_deleted && !is_all_xattrs_deleted) + goto end_removexattr; + + cat_entry_type = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset); + + if (cat_entry_type == HFSPLUS_FOLDER) { + flags = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset + + offsetof(struct hfsplus_cat_folder, flags)); + if (is_xattr_acl_deleted) + flags &= ~HFSPLUS_ACL_EXISTS; + if (is_all_xattrs_deleted) + flags &= ~HFSPLUS_XATTR_EXISTS; + hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset + + offsetof(struct hfsplus_cat_folder, flags), + flags); + hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY); + } else if (cat_entry_type == HFSPLUS_FILE) { + flags = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset + + offsetof(struct hfsplus_cat_file, flags)); + if (is_xattr_acl_deleted) + flags &= ~HFSPLUS_ACL_EXISTS; + if (is_all_xattrs_deleted) + flags &= ~HFSPLUS_XATTR_EXISTS; + hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset + + offsetof(struct hfsplus_cat_file, flags), + flags); + hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY); + } else { + printk(KERN_ERR "hfs: invalid catalog entry type\n"); + err = -EIO; + goto end_removexattr; + } + +end_removexattr: + hfs_find_exit(&cat_fd); + return err; +} + +static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + + XATTR_MAC_OSX_PREFIX_LEN + 1] = {0}; + size_t len = strlen(name); + + if (!strcmp(name, "")) + return -EINVAL; + + if (len > HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_MAC_OSX_PREFIX); + strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name); + + return hfsplus_getxattr(dentry, xattr_name, buffer, size); +} + +static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags, int type) +{ + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + + XATTR_MAC_OSX_PREFIX_LEN + 1] = {0}; + size_t len = strlen(name); + + if (!strcmp(name, "")) + return -EINVAL; + + if (len > HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_MAC_OSX_PREFIX); + strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name); + + return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); +} + +static size_t hfsplus_osx_listxattr(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + /* + * This method is not used. + * It is used hfsplus_listxattr() instead of generic_listxattr(). + */ + return -EOPNOTSUPP; +} + +const struct xattr_handler hfsplus_xattr_osx_handler = { + .prefix = XATTR_MAC_OSX_PREFIX, + .list = hfsplus_osx_listxattr, + .get = hfsplus_osx_getxattr, + .set = hfsplus_osx_setxattr, +}; diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h new file mode 100644 index 000000000000..847b695b984d --- /dev/null +++ b/fs/hfsplus/xattr.h @@ -0,0 +1,60 @@ +/* + * linux/fs/hfsplus/xattr.h + * + * Vyacheslav Dubeyko <slava@dubeyko.com> + * + * Logic of processing extended attributes + */ + +#ifndef _LINUX_HFSPLUS_XATTR_H +#define _LINUX_HFSPLUS_XATTR_H + +#include <linux/xattr.h> + +extern const struct xattr_handler hfsplus_xattr_osx_handler; +extern const struct xattr_handler hfsplus_xattr_user_handler; +extern const struct xattr_handler hfsplus_xattr_trusted_handler; +/*extern const struct xattr_handler hfsplus_xattr_acl_access_handler;*/ +/*extern const struct xattr_handler hfsplus_xattr_acl_default_handler;*/ +extern const struct xattr_handler hfsplus_xattr_security_handler; + +extern const struct xattr_handler *hfsplus_xattr_handlers[]; + +int __hfsplus_setxattr(struct inode *inode, const char *name, + const void *value, size_t size, int flags); + +static inline int hfsplus_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + return __hfsplus_setxattr(dentry->d_inode, name, value, size, flags); +} + +ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, + void *value, size_t size); + +ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); + +int hfsplus_removexattr(struct dentry *dentry, const char *name); + +int hfsplus_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr); + +static inline int hfsplus_init_acl(struct inode *inode, struct inode *dir) +{ + /*TODO: implement*/ + return 0; +} + +static inline int hfsplus_init_inode_security(struct inode *inode, + struct inode *dir, + const struct qstr *qstr) +{ + int err; + + err = hfsplus_init_acl(inode, dir); + if (!err) + err = hfsplus_init_security(inode, dir, qstr); + return err; +} + +#endif diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c new file mode 100644 index 000000000000..83b842f113c5 --- /dev/null +++ b/fs/hfsplus/xattr_security.c @@ -0,0 +1,104 @@ +/* + * linux/fs/hfsplus/xattr_trusted.c + * + * Vyacheslav Dubeyko <slava@dubeyko.com> + * + * Handler for storing security labels as extended attributes. + */ + +#include <linux/security.h> +#include "hfsplus_fs.h" +#include "xattr.h" + +static int hfsplus_security_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; + size_t len = strlen(name); + + if (!strcmp(name, "")) + return -EINVAL; + + if (len + XATTR_SECURITY_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_SECURITY_PREFIX); + strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name); + + return hfsplus_getxattr(dentry, xattr_name, buffer, size); +} + +static int hfsplus_security_setxattr(struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags, int type) +{ + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; + size_t len = strlen(name); + + if (!strcmp(name, "")) + return -EINVAL; + + if (len + XATTR_SECURITY_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_SECURITY_PREFIX); + strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name); + + return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); +} + +static size_t hfsplus_security_listxattr(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + /* + * This method is not used. + * It is used hfsplus_listxattr() instead of generic_listxattr(). + */ + return -EOPNOTSUPP; +} + +static int hfsplus_initxattrs(struct inode *inode, + const struct xattr *xattr_array, + void *fs_info) +{ + const struct xattr *xattr; + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; + size_t xattr_name_len; + int err = 0; + + for (xattr = xattr_array; xattr->name != NULL; xattr++) { + xattr_name_len = strlen(xattr->name); + + if (xattr_name_len == 0) + continue; + + if (xattr_name_len + XATTR_SECURITY_PREFIX_LEN > + HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_SECURITY_PREFIX); + strcpy(xattr_name + + XATTR_SECURITY_PREFIX_LEN, xattr->name); + memset(xattr_name + + XATTR_SECURITY_PREFIX_LEN + xattr_name_len, 0, 1); + + err = __hfsplus_setxattr(inode, xattr_name, + xattr->value, xattr->value_len, 0); + if (err) + break; + } + return err; +} + +int hfsplus_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr) +{ + return security_inode_init_security(inode, dir, qstr, + &hfsplus_initxattrs, NULL); +} + +const struct xattr_handler hfsplus_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .list = hfsplus_security_listxattr, + .get = hfsplus_security_getxattr, + .set = hfsplus_security_setxattr, +}; diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c new file mode 100644 index 000000000000..426cee277542 --- /dev/null +++ b/fs/hfsplus/xattr_trusted.c @@ -0,0 +1,63 @@ +/* + * linux/fs/hfsplus/xattr_trusted.c + * + * Vyacheslav Dubeyko <slava@dubeyko.com> + * + * Handler for trusted extended attributes. + */ + +#include "hfsplus_fs.h" +#include "xattr.h" + +static int hfsplus_trusted_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; + size_t len = strlen(name); + + if (!strcmp(name, "")) + return -EINVAL; + + if (len + XATTR_TRUSTED_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_TRUSTED_PREFIX); + strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name); + + return hfsplus_getxattr(dentry, xattr_name, buffer, size); +} + +static int hfsplus_trusted_setxattr(struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags, int type) +{ + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; + size_t len = strlen(name); + + if (!strcmp(name, "")) + return -EINVAL; + + if (len + XATTR_TRUSTED_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_TRUSTED_PREFIX); + strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name); + + return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); +} + +static size_t hfsplus_trusted_listxattr(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + /* + * This method is not used. + * It is used hfsplus_listxattr() instead of generic_listxattr(). + */ + return -EOPNOTSUPP; +} + +const struct xattr_handler hfsplus_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .list = hfsplus_trusted_listxattr, + .get = hfsplus_trusted_getxattr, + .set = hfsplus_trusted_setxattr, +}; diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c new file mode 100644 index 000000000000..e34016561ae0 --- /dev/null +++ b/fs/hfsplus/xattr_user.c @@ -0,0 +1,63 @@ +/* + * linux/fs/hfsplus/xattr_user.c + * + * Vyacheslav Dubeyko <slava@dubeyko.com> + * + * Handler for user extended attributes. + */ + +#include "hfsplus_fs.h" +#include "xattr.h" + +static int hfsplus_user_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; + size_t len = strlen(name); + + if (!strcmp(name, "")) + return -EINVAL; + + if (len + XATTR_USER_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_USER_PREFIX); + strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name); + + return hfsplus_getxattr(dentry, xattr_name, buffer, size); +} + +static int hfsplus_user_setxattr(struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags, int type) +{ + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; + size_t len = strlen(name); + + if (!strcmp(name, "")) + return -EINVAL; + + if (len + XATTR_USER_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_USER_PREFIX); + strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name); + + return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); +} + +static size_t hfsplus_user_listxattr(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + /* + * This method is not used. + * It is used hfsplus_listxattr() instead of generic_listxattr(). + */ + return -EOPNOTSUPP; +} + +const struct xattr_handler hfsplus_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .list = hfsplus_user_listxattr, + .get = hfsplus_user_getxattr, + .set = hfsplus_user_setxattr, +}; diff --git a/fs/inode.c b/fs/inode.c index 67880e604399..f5f7c06c36fb 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -798,11 +798,10 @@ static struct inode *find_inode(struct super_block *sb, int (*test)(struct inode *, void *), void *data) { - struct hlist_node *node; struct inode *inode = NULL; repeat: - hlist_for_each_entry(inode, node, head, i_hash) { + hlist_for_each_entry(inode, head, i_hash) { spin_lock(&inode->i_lock); if (inode->i_sb != sb) { spin_unlock(&inode->i_lock); @@ -830,11 +829,10 @@ repeat: static struct inode *find_inode_fast(struct super_block *sb, struct hlist_head *head, unsigned long ino) { - struct hlist_node *node; struct inode *inode = NULL; repeat: - hlist_for_each_entry(inode, node, head, i_hash) { + hlist_for_each_entry(inode, head, i_hash) { spin_lock(&inode->i_lock); if (inode->i_ino != ino) { spin_unlock(&inode->i_lock); @@ -1132,11 +1130,10 @@ EXPORT_SYMBOL(iget_locked); static int test_inode_iunique(struct super_block *sb, unsigned long ino) { struct hlist_head *b = inode_hashtable + hash(sb, ino); - struct hlist_node *node; struct inode *inode; spin_lock(&inode_hash_lock); - hlist_for_each_entry(inode, node, b, i_hash) { + hlist_for_each_entry(inode, b, i_hash) { if (inode->i_ino == ino && inode->i_sb == sb) { spin_unlock(&inode_hash_lock); return 0; @@ -1291,10 +1288,9 @@ int insert_inode_locked(struct inode *inode) struct hlist_head *head = inode_hashtable + hash(sb, ino); while (1) { - struct hlist_node *node; struct inode *old = NULL; spin_lock(&inode_hash_lock); - hlist_for_each_entry(old, node, head, i_hash) { + hlist_for_each_entry(old, head, i_hash) { if (old->i_ino != ino) continue; if (old->i_sb != sb) @@ -1306,7 +1302,7 @@ int insert_inode_locked(struct inode *inode) } break; } - if (likely(!node)) { + if (likely(!old)) { spin_lock(&inode->i_lock); inode->i_state |= I_NEW; hlist_add_head(&inode->i_hash, head); @@ -1334,11 +1330,10 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, struct hlist_head *head = inode_hashtable + hash(sb, hashval); while (1) { - struct hlist_node *node; struct inode *old = NULL; spin_lock(&inode_hash_lock); - hlist_for_each_entry(old, node, head, i_hash) { + hlist_for_each_entry(old, head, i_hash) { if (old->i_sb != sb) continue; if (!test(old, data)) @@ -1350,7 +1345,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, } break; } - if (likely(!node)) { + if (likely(!old)) { spin_lock(&inode->i_lock); inode->i_state |= I_NEW; hlist_add_head(&inode->i_hash, head); diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index a2717408c478..0796c45d0d4d 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -11,7 +11,7 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/nfs_fs.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/svc.h> #include <linux/lockd/lockd.h> #include <linux/kthread.h> @@ -220,10 +220,19 @@ reclaimer(void *ptr) { struct nlm_host *host = (struct nlm_host *) ptr; struct nlm_wait *block; + struct nlm_rqst *req; struct file_lock *fl, *next; u32 nsmstate; struct net *net = host->net; + req = kmalloc(sizeof(*req), GFP_KERNEL); + if (!req) { + printk(KERN_ERR "lockd: reclaimer unable to alloc memory." + " Locks for %s won't be reclaimed!\n", + host->h_name); + return 0; + } + allow_signal(SIGKILL); down_write(&host->h_rwsem); @@ -253,7 +262,7 @@ restart: */ if (signalled()) continue; - if (nlmclnt_reclaim(host, fl) != 0) + if (nlmclnt_reclaim(host, fl, req) != 0) continue; list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted); if (host->h_nsmstate != nsmstate) { @@ -279,5 +288,6 @@ restart: /* Release host handle after use */ nlmclnt_release_host(host); lockd_down(net); + kfree(req); return 0; } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 366277190b82..7e529c3c45c0 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -618,17 +618,15 @@ out_unlock: * RECLAIM: Try to reclaim a lock */ int -nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl) +nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl, + struct nlm_rqst *req) { - struct nlm_rqst reqst, *req; int status; - req = &reqst; memset(req, 0, sizeof(*req)); locks_init_lock(&req->a_args.lock.fl); locks_init_lock(&req->a_res.lock.fl); req->a_host = host; - req->a_flags = 0; /* Set up the argument struct */ nlmclnt_setlockargs(req, fl); diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 0e17090c310f..969d589c848d 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -13,6 +13,7 @@ #include <linux/in.h> #include <linux/in6.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/svc.h> #include <linux/lockd/lockd.h> #include <linux/mutex.h> @@ -32,15 +33,15 @@ static struct hlist_head nlm_server_hosts[NLM_HOST_NRHASH]; static struct hlist_head nlm_client_hosts[NLM_HOST_NRHASH]; -#define for_each_host(host, pos, chain, table) \ +#define for_each_host(host, chain, table) \ for ((chain) = (table); \ (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \ - hlist_for_each_entry((host), (pos), (chain), h_hash) + hlist_for_each_entry((host), (chain), h_hash) -#define for_each_host_safe(host, pos, next, chain, table) \ +#define for_each_host_safe(host, next, chain, table) \ for ((chain) = (table); \ (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \ - hlist_for_each_entry_safe((host), (pos), (next), \ + hlist_for_each_entry_safe((host), (next), \ (chain), h_hash) static unsigned long nrhosts; @@ -225,7 +226,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, .net = net, }; struct hlist_head *chain; - struct hlist_node *pos; struct nlm_host *host; struct nsm_handle *nsm = NULL; struct lockd_net *ln = net_generic(net, lockd_net_id); @@ -237,7 +237,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, mutex_lock(&nlm_host_mutex); chain = &nlm_client_hosts[nlm_hash_address(sap)]; - hlist_for_each_entry(host, pos, chain, h_hash) { + hlist_for_each_entry(host, chain, h_hash) { if (host->net != net) continue; if (!rpc_cmp_addr(nlm_addr(host), sap)) @@ -322,7 +322,6 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, const size_t hostname_len) { struct hlist_head *chain; - struct hlist_node *pos; struct nlm_host *host = NULL; struct nsm_handle *nsm = NULL; struct sockaddr *src_sap = svc_daddr(rqstp); @@ -350,7 +349,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, nlm_gc_hosts(net); chain = &nlm_server_hosts[nlm_hash_address(ni.sap)]; - hlist_for_each_entry(host, pos, chain, h_hash) { + hlist_for_each_entry(host, chain, h_hash) { if (host->net != net) continue; if (!rpc_cmp_addr(nlm_addr(host), ni.sap)) @@ -515,10 +514,9 @@ static struct nlm_host *next_host_state(struct hlist_head *cache, { struct nlm_host *host; struct hlist_head *chain; - struct hlist_node *pos; mutex_lock(&nlm_host_mutex); - for_each_host(host, pos, chain, cache) { + for_each_host(host, chain, cache) { if (host->h_nsmhandle == nsm && host->h_nsmstate != info->state) { host->h_nsmstate = info->state; @@ -570,7 +568,6 @@ void nlm_host_rebooted(const struct nlm_reboot *info) static void nlm_complain_hosts(struct net *net) { struct hlist_head *chain; - struct hlist_node *pos; struct nlm_host *host; if (net) { @@ -587,7 +584,7 @@ static void nlm_complain_hosts(struct net *net) dprintk("lockd: %lu hosts left:\n", nrhosts); } - for_each_host(host, pos, chain, nlm_server_hosts) { + for_each_host(host, chain, nlm_server_hosts) { if (net && host->net != net) continue; dprintk(" %s (cnt %d use %d exp %ld net %p)\n", @@ -600,14 +597,13 @@ void nlm_shutdown_hosts_net(struct net *net) { struct hlist_head *chain; - struct hlist_node *pos; struct nlm_host *host; mutex_lock(&nlm_host_mutex); /* First, make all hosts eligible for gc */ dprintk("lockd: nuking all hosts in net %p...\n", net); - for_each_host(host, pos, chain, nlm_server_hosts) { + for_each_host(host, chain, nlm_server_hosts) { if (net && host->net != net) continue; host->h_expires = jiffies - 1; @@ -644,11 +640,11 @@ static void nlm_gc_hosts(struct net *net) { struct hlist_head *chain; - struct hlist_node *pos, *next; + struct hlist_node *next; struct nlm_host *host; dprintk("lockd: host garbage collection for net %p\n", net); - for_each_host(host, pos, chain, nlm_server_hosts) { + for_each_host(host, chain, nlm_server_hosts) { if (net && host->net != net) continue; host->h_inuse = 0; @@ -657,7 +653,7 @@ nlm_gc_hosts(struct net *net) /* Mark all hosts that hold locks, blocks or shares */ nlmsvc_mark_resources(net); - for_each_host_safe(host, pos, next, chain, nlm_server_hosts) { + for_each_host_safe(host, next, chain, nlm_server_hosts) { if (net && host->net != net) continue; if (atomic_read(&host->h_count) || host->h_inuse diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 3c2cfc683631..1812f026960c 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -12,6 +12,7 @@ #include <linux/slab.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/xprtsock.h> #include <linux/sunrpc/svc.h> #include <linux/lockd/lockd.h> diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index b3a24b07d981..97e87415b145 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -13,7 +13,7 @@ #include <linux/slab.h> #include <linux/mutex.h> #include <linux/sunrpc/svc.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/nfsd/nfsfh.h> #include <linux/nfsd/export.h> #include <linux/lockd/lockd.h> @@ -84,7 +84,6 @@ __be32 nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, struct nfs_fh *f) { - struct hlist_node *pos; struct nlm_file *file; unsigned int hash; __be32 nfserr; @@ -96,7 +95,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, /* Lock file table */ mutex_lock(&nlm_file_mutex); - hlist_for_each_entry(file, pos, &nlm_files[hash], f_list) + hlist_for_each_entry(file, &nlm_files[hash], f_list) if (!nfs_compare_fh(&file->f_handle, f)) goto found; @@ -248,13 +247,13 @@ static int nlm_traverse_files(void *data, nlm_host_match_fn_t match, int (*is_failover_file)(void *data, struct nlm_file *file)) { - struct hlist_node *pos, *next; + struct hlist_node *next; struct nlm_file *file; int i, ret = 0; mutex_lock(&nlm_file_mutex); for (i = 0; i < FILE_NRHASH; i++) { - hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) { + hlist_for_each_entry_safe(file, next, &nlm_files[i], f_list) { if (is_failover_file && !is_failover_file(data, file)) continue; file->f_count++; diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index 862a2f16db64..5f7b053720ee 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -128,10 +128,13 @@ int nfs_cache_register_net(struct net *net, struct cache_detail *cd) struct super_block *pipefs_sb; int ret = 0; + sunrpc_init_cache_detail(cd); pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { ret = nfs_cache_register_sb(pipefs_sb, cd); rpc_put_sb_net(net); + if (ret) + sunrpc_destroy_cache_detail(cd); } return ret; } @@ -151,14 +154,5 @@ void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd) nfs_cache_unregister_sb(pipefs_sb, cd); rpc_put_sb_net(net); } -} - -void nfs_cache_init(struct cache_detail *cd) -{ - sunrpc_init_cache_detail(cd); -} - -void nfs_cache_destroy(struct cache_detail *cd) -{ sunrpc_destroy_cache_detail(cd); } diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index 317db95e37f8..4116d2c3f52f 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h @@ -23,8 +23,6 @@ extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void); extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq); extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq); -extern void nfs_cache_init(struct cache_detail *cd); -extern void nfs_cache_destroy(struct cache_detail *cd); extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd); extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd); extern int nfs_cache_register_sb(struct super_block *sb, diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 9f3c66438d0e..84d8eae203a7 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -197,7 +197,6 @@ error_0: EXPORT_SYMBOL_GPL(nfs_alloc_client); #if IS_ENABLED(CONFIG_NFS_V4) -/* idr_remove_all is not needed as all id's are removed by nfs_put_client */ void nfs_cleanup_cb_ident_idr(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index ca4b11ec87a2..945527092295 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/dns_resolver.h> #include "dns_resolve.h" @@ -42,6 +43,7 @@ EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); #include <linux/seq_file.h> #include <linux/inet.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/svcauth.h> #include <linux/sunrpc/rpc_pipe_fs.h> @@ -142,7 +144,7 @@ static int nfs_dns_upcall(struct cache_detail *cd, ret = nfs_cache_upcall(cd, key->hostname); if (ret) - ret = sunrpc_cache_pipe_upcall(cd, ch, nfs_dns_request); + ret = sunrpc_cache_pipe_upcall(cd, ch); return ret; } @@ -351,60 +353,47 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, } EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); +static struct cache_detail nfs_dns_resolve_template = { + .owner = THIS_MODULE, + .hash_size = NFS_DNS_HASHTBL_SIZE, + .name = "dns_resolve", + .cache_put = nfs_dns_ent_put, + .cache_upcall = nfs_dns_upcall, + .cache_request = nfs_dns_request, + .cache_parse = nfs_dns_parse, + .cache_show = nfs_dns_show, + .match = nfs_dns_match, + .init = nfs_dns_ent_init, + .update = nfs_dns_ent_update, + .alloc = nfs_dns_ent_alloc, +}; + + int nfs_dns_resolver_cache_init(struct net *net) { - int err = -ENOMEM; + int err; struct nfs_net *nn = net_generic(net, nfs_net_id); - struct cache_detail *cd; - struct cache_head **tbl; - cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL); - if (cd == NULL) - goto err_cd; - - tbl = kzalloc(NFS_DNS_HASHTBL_SIZE * sizeof(struct cache_head *), - GFP_KERNEL); - if (tbl == NULL) - goto err_tbl; - - cd->owner = THIS_MODULE, - cd->hash_size = NFS_DNS_HASHTBL_SIZE, - cd->hash_table = tbl, - cd->name = "dns_resolve", - cd->cache_put = nfs_dns_ent_put, - cd->cache_upcall = nfs_dns_upcall, - cd->cache_parse = nfs_dns_parse, - cd->cache_show = nfs_dns_show, - cd->match = nfs_dns_match, - cd->init = nfs_dns_ent_init, - cd->update = nfs_dns_ent_update, - cd->alloc = nfs_dns_ent_alloc, - - nfs_cache_init(cd); - err = nfs_cache_register_net(net, cd); + nn->nfs_dns_resolve = cache_create_net(&nfs_dns_resolve_template, net); + if (IS_ERR(nn->nfs_dns_resolve)) + return PTR_ERR(nn->nfs_dns_resolve); + + err = nfs_cache_register_net(net, nn->nfs_dns_resolve); if (err) goto err_reg; - nn->nfs_dns_resolve = cd; return 0; err_reg: - nfs_cache_destroy(cd); - kfree(cd->hash_table); -err_tbl: - kfree(cd); -err_cd: + cache_destroy_net(nn->nfs_dns_resolve, net); return err; } void nfs_dns_resolver_cache_destroy(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); - struct cache_detail *cd = nn->nfs_dns_resolve; - nfs_cache_unregister_net(net, cd); - nfs_cache_destroy(cd); - kfree(cd->hash_table); - kfree(cd); + nfs_cache_unregister_net(net, nn->nfs_dns_resolve); + cache_destroy_net(nn->nfs_dns_resolve, net); } static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 2e9779b58b7a..ac4fc9a8fdbc 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -6,6 +6,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_idmap.h> #include <linux/nfs_mount.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/auth.h> #include <linux/sunrpc/xprt.h> #include <linux/sunrpc/bc_xprt.h> @@ -29,15 +30,14 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) if (clp->rpc_ops->version != 4 || minorversion != 0) return ret; -retry: - if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL)) - return -ENOMEM; + idr_preload(GFP_KERNEL); spin_lock(&nn->nfs_client_lock); - ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); + ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT); + if (ret >= 0) + clp->cl_cb_ident = ret; spin_unlock(&nn->nfs_client_lock); - if (ret == -EAGAIN) - goto retry; - return ret; + idr_preload_end(); + return ret < 0 ? ret : 0; } #ifdef CONFIG_NFS_V4_1 diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index b720064bcd7f..1fe284f01f8b 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -31,6 +31,7 @@ #include <linux/nfs_fs.h> #include <linux/vmalloc.h> #include <linux/module.h> +#include <linux/sunrpc/addr.h> #include "internal.h" #include "nfs4session.h" diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 1e09eb78543b..0dd766079e1c 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/vfs.h> #include <linux/inet.h> #include "internal.h" diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index d35b62e83ea6..6da209bd9408 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -77,9 +77,8 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, long hash) { struct nfs4_deviceid_node *d; - struct hlist_node *n; - hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) + hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node) if (d->ld == ld && d->nfs_client == clp && !memcmp(&d->deviceid, id, sizeof(*id))) { if (atomic_read(&d->ref)) @@ -248,12 +247,11 @@ static void _deviceid_purge_client(const struct nfs_client *clp, long hash) { struct nfs4_deviceid_node *d; - struct hlist_node *n; HLIST_HEAD(tmp); spin_lock(&nfs4_deviceid_lock); rcu_read_lock(); - hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) + hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node) if (d->nfs_client == clp && atomic_read(&d->ref)) { hlist_del_init_rcu(&d->node); hlist_add_head(&d->tmpnode, &tmp); @@ -291,12 +289,11 @@ void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) { struct nfs4_deviceid_node *d; - struct hlist_node *n; int i; rcu_read_lock(); for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){ - hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node) + hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[i], node) if (d->nfs_client == clp) set_bit(NFS_DEVICEID_INVALID, &d->flags); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a9dc5fc29955..17b32b722457 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -31,6 +31,7 @@ #include <linux/errno.h> #include <linux/unistd.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/metrics.h> #include <linux/sunrpc/xprtsock.h> diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 93cc9d34c459..87fd1410b737 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -12,6 +12,10 @@ /* * Representation of a reply cache entry. + * + * Note that we use a sockaddr_in6 to hold the address instead of the more + * typical sockaddr_storage. This is for space reasons, since sockaddr_storage + * is much larger than a sockaddr_in6. */ struct svc_cacherep { struct hlist_node c_hash; @@ -20,11 +24,13 @@ struct svc_cacherep { unsigned char c_state, /* unused, inprog, done */ c_type, /* status, buffer */ c_secure : 1; /* req came from port < 1024 */ - struct sockaddr_in c_addr; + struct sockaddr_in6 c_addr; __be32 c_xid; u32 c_prot; u32 c_proc; u32 c_vers; + unsigned int c_len; + __wsum c_csum; unsigned long c_timestamp; union { struct kvec u_vec; @@ -46,8 +52,7 @@ enum { enum { RC_DROPIT, RC_REPLY, - RC_DOIT, - RC_INTR + RC_DOIT }; /* @@ -67,6 +72,12 @@ enum { */ #define RC_DELAY (HZ/5) +/* Cache entries expire after this time period */ +#define RC_EXPIRE (120 * HZ) + +/* Checksum this amount of the request */ +#define RC_CSUMLEN (256U) + int nfsd_reply_cache_init(void); void nfsd_reply_cache_shutdown(void); int nfsd_cache_lookup(struct svc_rqst *); diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 5681c5906f08..5f38ea36e266 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -67,11 +67,6 @@ static void expkey_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, expkey_request); -} - static struct svc_expkey *svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new, struct svc_expkey *old); static struct svc_expkey *svc_expkey_lookup(struct cache_detail *cd, struct svc_expkey *); @@ -245,7 +240,7 @@ static struct cache_detail svc_expkey_cache_template = { .hash_size = EXPKEY_HASHMAX, .name = "nfsd.fh", .cache_put = expkey_put, - .cache_upcall = expkey_upcall, + .cache_request = expkey_request, .cache_parse = expkey_parse, .cache_show = expkey_show, .match = expkey_match, @@ -315,6 +310,7 @@ static void svc_export_put(struct kref *ref) path_put(&exp->ex_path); auth_domain_put(exp->ex_client); nfsd4_fslocs_free(&exp->ex_fslocs); + kfree(exp->ex_uuid); kfree(exp); } @@ -337,11 +333,6 @@ static void svc_export_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, svc_export_request); -} - static struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old); static struct svc_export *svc_export_lookup(struct svc_export *); @@ -674,6 +665,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) new->ex_fslocs.locations = NULL; new->ex_fslocs.locations_count = 0; new->ex_fslocs.migrated = 0; + new->ex_uuid = NULL; new->cd = item->cd; } @@ -715,7 +707,7 @@ static struct cache_detail svc_export_cache_template = { .hash_size = EXPORT_HASHMAX, .name = "nfsd.export", .cache_put = svc_export_put, - .cache_upcall = svc_export_upcall, + .cache_request = svc_export_request, .cache_parse = svc_export_parse, .cache_show = svc_export_show, .match = svc_export_match, diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index 497584c70366..d620e7f81429 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -9,7 +9,7 @@ #include <linux/debugfs.h> #include <linux/module.h> #include <linux/nsproxy.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <asm/uaccess.h> #include "state.h" diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 0ce12346df9c..4832fd819f88 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -140,12 +140,6 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, } static int -idtoname_upcall(struct cache_detail *cd, struct cache_head *ch) -{ - return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request); -} - -static int idtoname_match(struct cache_head *ca, struct cache_head *cb) { struct ent *a = container_of(ca, struct ent, h); @@ -192,7 +186,7 @@ static struct cache_detail idtoname_cache_template = { .hash_size = ENT_HASHMAX, .name = "nfs4.idtoname", .cache_put = ent_put, - .cache_upcall = idtoname_upcall, + .cache_request = idtoname_request, .cache_parse = idtoname_parse, .cache_show = idtoname_show, .warn_no_listener = warn_no_idmapd, @@ -321,12 +315,6 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, } static int -nametoid_upcall(struct cache_detail *cd, struct cache_head *ch) -{ - return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request); -} - -static int nametoid_match(struct cache_head *ca, struct cache_head *cb) { struct ent *a = container_of(ca, struct ent, h); @@ -365,7 +353,7 @@ static struct cache_detail nametoid_cache_template = { .hash_size = ENT_HASHMAX, .name = "nfs4.nametoid", .cache_put = ent_put, - .cache_upcall = nametoid_upcall, + .cache_request = nametoid_request, .cache_parse = nametoid_parse, .cache_show = nametoid_show, .warn_no_listener = warn_no_idmapd, diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9d1c5dba2bbb..ae73175e6e68 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -993,14 +993,15 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!buf) return nfserr_jukebox; + p = buf; status = nfsd4_encode_fattr(&cstate->current_fh, cstate->current_fh.fh_export, - cstate->current_fh.fh_dentry, buf, - &count, verify->ve_bmval, + cstate->current_fh.fh_dentry, &p, + count, verify->ve_bmval, rqstp, 0); /* this means that nfsd4_encode_fattr() ran out of space */ - if (status == nfserr_resource && count == 0) + if (status == nfserr_resource) status = nfserr_not_same; if (status) goto out_kfree; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 4914af4a817e..899ca26dd194 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -1185,6 +1185,12 @@ bin_to_hex_dup(const unsigned char *src, int srclen) static int nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net) { + /* XXX: The usermode helper s not working in container yet. */ + if (net != &init_net) { + WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " + "tracking in a container!\n"); + return -EINVAL; + } return nfsd4_umh_cltrack_upcall("init", NULL, NULL); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9e7103b6e0ad..16d39c6c4fbb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -40,7 +40,7 @@ #include <linux/pagemap.h> #include <linux/ratelimit.h> #include <linux/sunrpc/svcauth_gss.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include "xdr4.h" #include "vfs.h" #include "current_stateid.h" @@ -261,33 +261,46 @@ static inline int get_new_stid(struct nfs4_stid *stid) return new_stid; } -static void init_stid(struct nfs4_stid *stid, struct nfs4_client *cl, unsigned char type) +static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct +kmem_cache *slab) { - stateid_t *s = &stid->sc_stateid; + struct idr *stateids = &cl->cl_stateids; + static int min_stateid = 0; + struct nfs4_stid *stid; int new_id; - stid->sc_type = type; + stid = kmem_cache_alloc(slab, GFP_KERNEL); + if (!stid) + return NULL; + + if (!idr_pre_get(stateids, GFP_KERNEL)) + goto out_free; + if (idr_get_new_above(stateids, stid, min_stateid, &new_id)) + goto out_free; stid->sc_client = cl; - s->si_opaque.so_clid = cl->cl_clientid; - new_id = get_new_stid(stid); - s->si_opaque.so_id = (u32)new_id; + stid->sc_type = 0; + stid->sc_stateid.si_opaque.so_id = new_id; + stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ - s->si_generation = 0; -} - -static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab) -{ - struct idr *stateids = &cl->cl_stateids; + stid->sc_stateid.si_generation = 0; - if (!idr_pre_get(stateids, GFP_KERNEL)) - return NULL; /* - * Note: if we fail here (or any time between now and the time - * we actually get the new idr), we won't need to undo the idr - * preallocation, since the idr code caps the number of - * preallocated entries. + * It shouldn't be a problem to reuse an opaque stateid value. + * I don't think it is for 4.1. But with 4.0 I worry that, for + * example, a stray write retransmission could be accepted by + * the server when it should have been rejected. Therefore, + * adopt a trick from the sctp code to attempt to maximize the + * amount of time until an id is reused, by ensuring they always + * "increase" (mod INT_MAX): */ - return kmem_cache_alloc(slab, GFP_KERNEL); + + min_stateid = new_id+1; + if (min_stateid == INT_MAX) + min_stateid = 0; + return stid; +out_free: + kfree(stid); + return NULL; } static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) @@ -316,7 +329,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); if (dp == NULL) return dp; - init_stid(&dp->dl_stid, clp, NFS4_DELEG_STID); + dp->dl_stid.sc_type = NFS4_DELEG_STID; /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid @@ -337,13 +350,21 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv return dp; } +static void free_stid(struct nfs4_stid *s, struct kmem_cache *slab) +{ + struct idr *stateids = &s->sc_client->cl_stateids; + + idr_remove(stateids, s->sc_stateid.si_opaque.so_id); + kmem_cache_free(slab, s); +} + void nfs4_put_delegation(struct nfs4_delegation *dp) { if (atomic_dec_and_test(&dp->dl_count)) { dprintk("NFSD: freeing dp %p\n",dp); put_nfs4_file(dp->dl_file); - kmem_cache_free(deleg_slab, dp); + free_stid(&dp->dl_stid, deleg_slab); num_delegations--; } } @@ -360,9 +381,7 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp) static void unhash_stid(struct nfs4_stid *s) { - struct idr *stateids = &s->sc_client->cl_stateids; - - idr_remove(stateids, s->sc_stateid.si_opaque.so_id); + s->sc_type = 0; } /* Called under the state lock. */ @@ -519,7 +538,7 @@ static void close_generic_stateid(struct nfs4_ol_stateid *stp) static void free_generic_stateid(struct nfs4_ol_stateid *stp) { - kmem_cache_free(stateid_slab, stp); + free_stid(&stp->st_stid, stateid_slab); } static void release_lock_stateid(struct nfs4_ol_stateid *stp) @@ -905,7 +924,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan, new = __alloc_session(slotsize, numslots); if (!new) { - nfsd4_put_drc_mem(slotsize, fchan->maxreqs); + nfsd4_put_drc_mem(slotsize, numslots); return NULL; } init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize, nn); @@ -1048,7 +1067,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) static inline void free_client(struct nfs4_client *clp) { - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); while (!list_empty(&clp->cl_sessions)) { @@ -1060,6 +1079,7 @@ free_client(struct nfs4_client *clp) } free_svc_cred(&clp->cl_cred); kfree(clp->cl_name.data); + idr_destroy(&clp->cl_stateids); kfree(clp); } @@ -1258,7 +1278,12 @@ static void gen_confirm(struct nfs4_client *clp) static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) { - return idr_find(&cl->cl_stateids, t->si_opaque.so_id); + struct nfs4_stid *ret; + + ret = idr_find(&cl->cl_stateids, t->si_opaque.so_id); + if (!ret || !ret->sc_type) + return NULL; + return ret; } static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) @@ -1844,11 +1869,12 @@ nfsd4_create_session(struct svc_rqst *rqstp, /* cache solo and embedded create sessions under the state lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); -out: nfs4_unlock_state(); +out: dprintk("%s returns %d\n", __func__, ntohl(status)); return status; out_free_conn: + nfs4_unlock_state(); free_conn(conn); out_free_session: __free_session(new); @@ -2443,9 +2469,8 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_openowner *oo = open->op_openowner; - struct nfs4_client *clp = oo->oo_owner.so_client; - init_stid(&stp->st_stid, clp, NFS4_OPEN_STID); + stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_lockowners); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); @@ -4031,7 +4056,7 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct stp = nfs4_alloc_stateid(clp); if (stp == NULL) return NULL; - init_stid(&stp->st_stid, clp, NFS4_LOCK_STID); + stp->st_stid.sc_type = NFS4_LOCK_STID; list_add(&stp->st_perfile, &fp->fi_stateids); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); stp->st_stateowner = &lo->lo_owner; @@ -4913,16 +4938,6 @@ nfs4_state_start_net(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; - /* - * FIXME: For now, we hang most of the pernet global stuff off of - * init_net until nfsd is fully containerized. Eventually, we'll - * need to pass a net pointer into this function, take a reference - * to that instead and then do most of the rest of this on a per-net - * basis. - */ - if (net != &init_net) - return -EINVAL; - ret = nfs4_state_create_net(net); if (ret) return ret; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8ca6d17f6cf3..01168865dd37 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2024,12 +2024,11 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) * Note: @fhp can be NULL; in this case, we might have to compose the filehandle * ourselves. * - * @countp is the buffer size in _words_; upon successful return this becomes - * replaced with the number of words written. + * countp is the buffer size in _words_ */ __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval, + struct dentry *dentry, __be32 **buffer, int count, u32 *bmval, struct svc_rqst *rqstp, int ignore_crossmnt) { u32 bmval0 = bmval[0]; @@ -2038,12 +2037,12 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, struct kstat stat; struct svc_fh tempfh; struct kstatfs statfs; - int buflen = *countp << 2; + int buflen = count << 2; __be32 *attrlenp; u32 dummy; u64 dummy64; u32 rdattr_err = 0; - __be32 *p = buffer; + __be32 *p = *buffer; __be32 status; int err; int aclsupport = 0; @@ -2447,7 +2446,7 @@ out_acl: } *attrlenp = htonl((char *)p - (char *)attrlenp - 4); - *countp = p - buffer; + *buffer = p; status = nfs_ok; out: @@ -2459,7 +2458,6 @@ out_nfserr: status = nfserrno(err); goto out; out_resource: - *countp = 0; status = nfserr_resource; goto out; out_serverfault: @@ -2478,7 +2476,7 @@ static inline int attributes_need_mount(u32 *bmval) static __be32 nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, - const char *name, int namlen, __be32 *p, int *buflen) + const char *name, int namlen, __be32 **p, int buflen) { struct svc_export *exp = cd->rd_fhp->fh_export; struct dentry *dentry; @@ -2584,10 +2582,9 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ - nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen); + nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen); switch (nfserr) { case nfs_ok: - p += buflen; break; case nfserr_resource: nfserr = nfserr_toosmall; @@ -2714,10 +2711,8 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, - resp->p, &buflen, getattr->ga_bmval, + &resp->p, buflen, getattr->ga_bmval, resp->rqstp, 0); - if (!nfserr) - resp->p += buflen; return nfserr; } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 2cbac34a55da..62c1ee128aeb 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -9,22 +9,22 @@ */ #include <linux/slab.h> +#include <linux/sunrpc/addr.h> +#include <linux/highmem.h> +#include <net/checksum.h> #include "nfsd.h" #include "cache.h" -/* Size of reply cache. Common values are: - * 4.3BSD: 128 - * 4.4BSD: 256 - * Solaris2: 1024 - * DEC Unix: 512-4096 - */ -#define CACHESIZE 1024 +#define NFSDDBG_FACILITY NFSDDBG_REPCACHE + #define HASHSIZE 64 static struct hlist_head * cache_hash; static struct list_head lru_head; -static int cache_disabled = 1; +static struct kmem_cache *drc_slab; +static unsigned int num_drc_entries; +static unsigned int max_drc_entries; /* * Calculate the hash index from an XID. @@ -37,6 +37,14 @@ static inline u32 request_hash(u32 xid) } static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); +static void cache_cleaner_func(struct work_struct *unused); +static int nfsd_reply_cache_shrink(struct shrinker *shrink, + struct shrink_control *sc); + +struct shrinker nfsd_reply_cache_shrinker = { + .shrink = nfsd_reply_cache_shrink, + .seeks = 1, +}; /* * locking for the reply cache: @@ -44,30 +52,86 @@ static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); * Otherwise, it when accessing _prev or _next, the lock must be held. */ static DEFINE_SPINLOCK(cache_lock); +static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); -int nfsd_reply_cache_init(void) +/* + * Put a cap on the size of the DRC based on the amount of available + * low memory in the machine. + * + * 64MB: 8192 + * 128MB: 11585 + * 256MB: 16384 + * 512MB: 23170 + * 1GB: 32768 + * 2GB: 46340 + * 4GB: 65536 + * 8GB: 92681 + * 16GB: 131072 + * + * ...with a hard cap of 256k entries. In the worst case, each entry will be + * ~1k, so the above numbers should give a rough max of the amount of memory + * used in k. + */ +static unsigned int +nfsd_cache_size_limit(void) +{ + unsigned int limit; + unsigned long low_pages = totalram_pages - totalhigh_pages; + + limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10); + return min_t(unsigned int, limit, 256*1024); +} + +static struct svc_cacherep * +nfsd_reply_cache_alloc(void) { struct svc_cacherep *rp; - int i; - INIT_LIST_HEAD(&lru_head); - i = CACHESIZE; - while (i) { - rp = kmalloc(sizeof(*rp), GFP_KERNEL); - if (!rp) - goto out_nomem; - list_add(&rp->c_lru, &lru_head); + rp = kmem_cache_alloc(drc_slab, GFP_KERNEL); + if (rp) { rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; + INIT_LIST_HEAD(&rp->c_lru); INIT_HLIST_NODE(&rp->c_hash); - i--; } + return rp; +} + +static void +nfsd_reply_cache_free_locked(struct svc_cacherep *rp) +{ + if (rp->c_type == RC_REPLBUFF) + kfree(rp->c_replvec.iov_base); + hlist_del(&rp->c_hash); + list_del(&rp->c_lru); + --num_drc_entries; + kmem_cache_free(drc_slab, rp); +} + +static void +nfsd_reply_cache_free(struct svc_cacherep *rp) +{ + spin_lock(&cache_lock); + nfsd_reply_cache_free_locked(rp); + spin_unlock(&cache_lock); +} + +int nfsd_reply_cache_init(void) +{ + register_shrinker(&nfsd_reply_cache_shrinker); + drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), + 0, 0, NULL); + if (!drc_slab) + goto out_nomem; - cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); + cache_hash = kcalloc(HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); if (!cache_hash) goto out_nomem; - cache_disabled = 0; + INIT_LIST_HEAD(&lru_head); + max_drc_entries = nfsd_cache_size_limit(); + num_drc_entries = 0; + return 0; out_nomem: printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); @@ -79,27 +143,33 @@ void nfsd_reply_cache_shutdown(void) { struct svc_cacherep *rp; + unregister_shrinker(&nfsd_reply_cache_shrinker); + cancel_delayed_work_sync(&cache_cleaner); + while (!list_empty(&lru_head)) { rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); - if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF) - kfree(rp->c_replvec.iov_base); - list_del(&rp->c_lru); - kfree(rp); + nfsd_reply_cache_free_locked(rp); } - cache_disabled = 1; - kfree (cache_hash); cache_hash = NULL; + + if (drc_slab) { + kmem_cache_destroy(drc_slab); + drc_slab = NULL; + } } /* - * Move cache entry to end of LRU list + * Move cache entry to end of LRU list, and queue the cleaner to run if it's + * not already scheduled. */ static void lru_put_end(struct svc_cacherep *rp) { + rp->c_timestamp = jiffies; list_move_tail(&rp->c_lru, &lru_head); + schedule_delayed_work(&cache_cleaner, RC_EXPIRE); } /* @@ -112,83 +182,214 @@ hash_refile(struct svc_cacherep *rp) hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid)); } +static inline bool +nfsd_cache_entry_expired(struct svc_cacherep *rp) +{ + return rp->c_state != RC_INPROG && + time_after(jiffies, rp->c_timestamp + RC_EXPIRE); +} + +/* + * Walk the LRU list and prune off entries that are older than RC_EXPIRE. + * Also prune the oldest ones when the total exceeds the max number of entries. + */ +static void +prune_cache_entries(void) +{ + struct svc_cacherep *rp, *tmp; + + list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { + if (!nfsd_cache_entry_expired(rp) && + num_drc_entries <= max_drc_entries) + break; + nfsd_reply_cache_free_locked(rp); + } + + /* + * Conditionally rearm the job. If we cleaned out the list, then + * cancel any pending run (since there won't be any work to do). + * Otherwise, we rearm the job or modify the existing one to run in + * RC_EXPIRE since we just ran the pruner. + */ + if (list_empty(&lru_head)) + cancel_delayed_work(&cache_cleaner); + else + mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); +} + +static void +cache_cleaner_func(struct work_struct *unused) +{ + spin_lock(&cache_lock); + prune_cache_entries(); + spin_unlock(&cache_lock); +} + +static int +nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned int num; + + spin_lock(&cache_lock); + if (sc->nr_to_scan) + prune_cache_entries(); + num = num_drc_entries; + spin_unlock(&cache_lock); + + return num; +} + +/* + * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes + */ +static __wsum +nfsd_cache_csum(struct svc_rqst *rqstp) +{ + int idx; + unsigned int base; + __wsum csum; + struct xdr_buf *buf = &rqstp->rq_arg; + const unsigned char *p = buf->head[0].iov_base; + size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len, + RC_CSUMLEN); + size_t len = min(buf->head[0].iov_len, csum_len); + + /* rq_arg.head first */ + csum = csum_partial(p, len, 0); + csum_len -= len; + + /* Continue into page array */ + idx = buf->page_base / PAGE_SIZE; + base = buf->page_base & ~PAGE_MASK; + while (csum_len) { + p = page_address(buf->pages[idx]) + base; + len = min_t(size_t, PAGE_SIZE - base, csum_len); + csum = csum_partial(p, len, csum); + csum_len -= len; + base = 0; + ++idx; + } + return csum; +} + +/* + * Search the request hash for an entry that matches the given rqstp. + * Must be called with cache_lock held. Returns the found entry or + * NULL on failure. + */ +static struct svc_cacherep * +nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) +{ + struct svc_cacherep *rp; + struct hlist_head *rh; + __be32 xid = rqstp->rq_xid; + u32 proto = rqstp->rq_prot, + vers = rqstp->rq_vers, + proc = rqstp->rq_proc; + + rh = &cache_hash[request_hash(xid)]; + hlist_for_each_entry(rp, rh, c_hash) { + if (xid == rp->c_xid && proc == rp->c_proc && + proto == rp->c_prot && vers == rp->c_vers && + rqstp->rq_arg.len == rp->c_len && csum == rp->c_csum && + rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) && + rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr)) + return rp; + } + return NULL; +} + /* * Try to find an entry matching the current call in the cache. When none - * is found, we grab the oldest unlocked entry off the LRU list. - * Note that no operation within the loop may sleep. + * is found, we try to grab the oldest expired entry off the LRU list. If + * a suitable one isn't there, then drop the cache_lock and allocate a + * new one, then search again in case one got inserted while this thread + * didn't hold the lock. */ int nfsd_cache_lookup(struct svc_rqst *rqstp) { - struct hlist_node *hn; - struct hlist_head *rh; - struct svc_cacherep *rp; + struct svc_cacherep *rp, *found; __be32 xid = rqstp->rq_xid; u32 proto = rqstp->rq_prot, vers = rqstp->rq_vers, proc = rqstp->rq_proc; + __wsum csum; unsigned long age; int type = rqstp->rq_cachetype; int rtn; rqstp->rq_cacherep = NULL; - if (cache_disabled || type == RC_NOCACHE) { + if (type == RC_NOCACHE) { nfsdstats.rcnocache++; return RC_DOIT; } + csum = nfsd_cache_csum(rqstp); + spin_lock(&cache_lock); rtn = RC_DOIT; - rh = &cache_hash[request_hash(xid)]; - hlist_for_each_entry(rp, hn, rh, c_hash) { - if (rp->c_state != RC_UNUSED && - xid == rp->c_xid && proc == rp->c_proc && - proto == rp->c_prot && vers == rp->c_vers && - time_before(jiffies, rp->c_timestamp + 120*HZ) && - memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, sizeof(rp->c_addr))==0) { - nfsdstats.rchits++; - goto found_entry; + rp = nfsd_cache_search(rqstp, csum); + if (rp) + goto found_entry; + + /* Try to use the first entry on the LRU */ + if (!list_empty(&lru_head)) { + rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru); + if (nfsd_cache_entry_expired(rp) || + num_drc_entries >= max_drc_entries) { + lru_put_end(rp); + prune_cache_entries(); + goto setup_entry; } } - nfsdstats.rcmisses++; - /* This loop shouldn't take more than a few iterations normally */ - { - int safe = 0; - list_for_each_entry(rp, &lru_head, c_lru) { - if (rp->c_state != RC_INPROG) - break; - if (safe++ > CACHESIZE) { - printk("nfsd: loop in repcache LRU list\n"); - cache_disabled = 1; - goto out; - } + /* Drop the lock and allocate a new entry */ + spin_unlock(&cache_lock); + rp = nfsd_reply_cache_alloc(); + if (!rp) { + dprintk("nfsd: unable to allocate DRC entry!\n"); + return RC_DOIT; } + spin_lock(&cache_lock); + ++num_drc_entries; + + /* + * Must search again just in case someone inserted one + * after we dropped the lock above. + */ + found = nfsd_cache_search(rqstp, csum); + if (found) { + nfsd_reply_cache_free_locked(rp); + rp = found; + goto found_entry; } - /* All entries on the LRU are in-progress. This should not happen */ - if (&rp->c_lru == &lru_head) { - static int complaints; - - printk(KERN_WARNING "nfsd: all repcache entries locked!\n"); - if (++complaints > 5) { - printk(KERN_WARNING "nfsd: disabling repcache.\n"); - cache_disabled = 1; - } - goto out; - } + /* + * We're keeping the one we just allocated. Are we now over the + * limit? Prune one off the tip of the LRU in trade for the one we + * just allocated if so. + */ + if (num_drc_entries >= max_drc_entries) + nfsd_reply_cache_free_locked(list_first_entry(&lru_head, + struct svc_cacherep, c_lru)); +setup_entry: + nfsdstats.rcmisses++; rqstp->rq_cacherep = rp; rp->c_state = RC_INPROG; rp->c_xid = xid; rp->c_proc = proc; - memcpy(&rp->c_addr, svc_addr_in(rqstp), sizeof(rp->c_addr)); + rpc_copy_addr((struct sockaddr *)&rp->c_addr, svc_addr(rqstp)); + rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp))); rp->c_prot = proto; rp->c_vers = vers; - rp->c_timestamp = jiffies; + rp->c_len = rqstp->rq_arg.len; + rp->c_csum = csum; hash_refile(rp); + lru_put_end(rp); /* release any buffer */ if (rp->c_type == RC_REPLBUFF) { @@ -201,9 +402,9 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) return rtn; found_entry: + nfsdstats.rchits++; /* We found a matching entry which is either in progress or done. */ age = jiffies - rp->c_timestamp; - rp->c_timestamp = jiffies; lru_put_end(rp); rtn = RC_DROPIT; @@ -232,7 +433,7 @@ found_entry: break; default: printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); - rp->c_state = RC_UNUSED; + nfsd_reply_cache_free_locked(rp); } goto out; @@ -257,11 +458,11 @@ found_entry: void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) { - struct svc_cacherep *rp; + struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; int len; - if (!(rp = rqstp->rq_cacherep) || cache_disabled) + if (!rp) return; len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); @@ -269,7 +470,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { - rp->c_state = RC_UNUSED; + nfsd_reply_cache_free(rp); return; } @@ -283,21 +484,21 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) cachv = &rp->c_replvec; cachv->iov_base = kmalloc(len << 2, GFP_KERNEL); if (!cachv->iov_base) { - spin_lock(&cache_lock); - rp->c_state = RC_UNUSED; - spin_unlock(&cache_lock); + nfsd_reply_cache_free(rp); return; } cachv->iov_len = len << 2; memcpy(cachv->iov_base, statp, len << 2); break; + case RC_NOCACHE: + nfsd_reply_cache_free(rp); + return; } spin_lock(&cache_lock); lru_put_end(rp); rp->c_secure = rqstp->rq_secure; rp->c_type = cachetype; rp->c_state = RC_DONE; - rp->c_timestamp = jiffies; spin_unlock(&cache_lock); return; } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 2db7021b01ae..13a21c8fca49 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -10,7 +10,7 @@ #include <linux/sunrpc/svcsock.h> #include <linux/lockd/lockd.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/gss_api.h> #include <linux/sunrpc/gss_krb5_enctypes.h> #include <linux/sunrpc/rpc_pipe_fs.h> @@ -125,11 +125,11 @@ static const struct file_operations transaction_ops = { .llseek = default_llseek, }; -static int exports_open(struct inode *inode, struct file *file) +static int exports_net_open(struct net *net, struct file *file) { int err; struct seq_file *seq; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); err = seq_open(file, &nfs_exports_op); if (err) @@ -140,8 +140,26 @@ static int exports_open(struct inode *inode, struct file *file) return 0; } -static const struct file_operations exports_operations = { - .open = exports_open, +static int exports_proc_open(struct inode *inode, struct file *file) +{ + return exports_net_open(current->nsproxy->net_ns, file); +} + +static const struct file_operations exports_proc_operations = { + .open = exports_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .owner = THIS_MODULE, +}; + +static int exports_nfsd_open(struct inode *inode, struct file *file) +{ + return exports_net_open(inode->i_sb->s_fs_info, file); +} + +static const struct file_operations exports_nfsd_operations = { + .open = exports_nfsd_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, @@ -220,6 +238,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) struct sockaddr *sap = (struct sockaddr *)&address; size_t salen = sizeof(address); char *fo_path; + struct net *net = file->f_dentry->d_sb->s_fs_info; /* sanity check */ if (size == 0) @@ -232,7 +251,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; - if (rpc_pton(&init_net, fo_path, size, sap, salen) == 0) + if (rpc_pton(net, fo_path, size, sap, salen) == 0) return -EINVAL; return nlmsvc_unlock_all_by_ip(sap); @@ -317,6 +336,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) int len; struct auth_domain *dom; struct knfsd_fh fh; + struct net *net = file->f_dentry->d_sb->s_fs_info; if (size == 0) return -EINVAL; @@ -352,7 +372,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) if (!dom) return -ENOMEM; - len = exp_rootfh(&init_net, dom, path, &fh, maxsize); + len = exp_rootfh(net, dom, path, &fh, maxsize); auth_domain_put(dom); if (len) return len; @@ -396,7 +416,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) { char *mesg = buf; int rv; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; if (size > 0) { int newthreads; @@ -447,7 +467,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) int len; int npools; int *nthreads; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; mutex_lock(&nfsd_mutex); npools = nfsd_nrpools(net); @@ -510,7 +530,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) unsigned minor; ssize_t tlen = 0; char *sep; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (size>0) { @@ -534,7 +554,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) else num = simple_strtol(vers, &minorp, 0); if (*minorp == '.') { - if (num < 4) + if (num != 4) return -EINVAL; minor = simple_strtoul(minorp+1, NULL, 0); if (minor == 0) @@ -792,7 +812,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size, static ssize_t write_ports(struct file *file, char *buf, size_t size) { ssize_t rv; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; mutex_lock(&nfsd_mutex); rv = __write_ports(file, buf, size, net); @@ -827,7 +847,7 @@ int nfsd_max_blksize; static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) { char *mesg = buf; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (size > 0) { @@ -923,7 +943,8 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, */ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) { - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn); } @@ -939,7 +960,8 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) */ static ssize_t write_gracetime(struct file *file, char *buf, size_t size) { - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn); } @@ -995,7 +1017,8 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) { ssize_t rv; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); mutex_lock(&nfsd_mutex); rv = __write_recoverydir(file, buf, size, nn); @@ -1013,7 +1036,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) static int nfsd_fill_super(struct super_block * sb, void * data, int silent) { static struct tree_descr nfsd_files[] = { - [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, + [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", &export_features_operations, S_IRUGO}, [NFSD_FO_UnlockIP] = {"unlock_ip", @@ -1037,20 +1060,35 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) #endif /* last one */ {""} }; - return simple_fill_super(sb, 0x6e667364, nfsd_files); + struct net *net = data; + int ret; + + ret = simple_fill_super(sb, 0x6e667364, nfsd_files); + if (ret) + return ret; + sb->s_fs_info = get_net(net); + return 0; } static struct dentry *nfsd_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_single(fs_type, flags, data, nfsd_fill_super); + return mount_ns(fs_type, flags, current->nsproxy->net_ns, nfsd_fill_super); +} + +static void nfsd_umount(struct super_block *sb) +{ + struct net *net = sb->s_fs_info; + + kill_litter_super(sb); + put_net(net); } static struct file_system_type nfsd_fs_type = { .owner = THIS_MODULE, .name = "nfsd", .mount = nfsd_mount, - .kill_sb = kill_litter_super, + .kill_sb = nfsd_umount, }; #ifdef CONFIG_PROC_FS @@ -1061,7 +1099,8 @@ static int create_proc_exports_entry(void) entry = proc_mkdir("fs/nfs", NULL); if (!entry) return -ENOMEM; - entry = proc_create("exports", 0, entry, &exports_operations); + entry = proc_create("exports", 0, entry, + &exports_proc_operations); if (!entry) return -ENOMEM; return 0; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index be7af509930c..262df5ccbf59 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -652,7 +652,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) /* Check whether we have this call in the cache. */ switch (nfsd_cache_lookup(rqstp)) { - case RC_INTR: case RC_DROPIT: return 0; case RC_REPLY: @@ -703,8 +702,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) int nfsd_pool_stats_open(struct inode *inode, struct file *file) { int ret; - struct net *net = &init_net; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id); mutex_lock(&nfsd_mutex); if (nn->nfsd_serv == NULL) { @@ -721,7 +719,7 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file) int nfsd_pool_stats_release(struct inode *inode, struct file *file) { int ret = seq_release(inode, file); - struct net *net = &init_net; + struct net *net = inode->i_sb->s_fs_info; mutex_lock(&nfsd_mutex); /* this function really, really should have been called svc_put() */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 0889bfb43dc9..546f8983ecf1 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -563,7 +563,7 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 *buffer, int *countp, + struct dentry *dentry, __be32 **buffer, int countp, u32 *bmval, struct svc_rqst *, int ignore_crossmnt); extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 6baadb5a8430..4bb21d67d9b1 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -52,7 +52,6 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt) void __fsnotify_update_child_dentry_flags(struct inode *inode) { struct dentry *alias; - struct hlist_node *p; int watched; if (!S_ISDIR(inode->i_mode)) @@ -64,7 +63,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ - hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { struct dentry *child; /* run all of the children of the original inode and fix their diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index f31e90fc050d..74825be65b7b 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -36,12 +36,11 @@ static void fsnotify_recalc_inode_mask_locked(struct inode *inode) { struct fsnotify_mark *mark; - struct hlist_node *pos; __u32 new_mask = 0; assert_spin_locked(&inode->i_lock); - hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) + hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list) new_mask |= mark->mask; inode->i_fsnotify_mask = new_mask; } @@ -87,11 +86,11 @@ void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark) void fsnotify_clear_marks_by_inode(struct inode *inode) { struct fsnotify_mark *mark, *lmark; - struct hlist_node *pos, *n; + struct hlist_node *n; LIST_HEAD(free_list); spin_lock(&inode->i_lock); - hlist_for_each_entry_safe(mark, pos, n, &inode->i_fsnotify_marks, i.i_list) { + hlist_for_each_entry_safe(mark, n, &inode->i_fsnotify_marks, i.i_list) { list_add(&mark->i.free_i_list, &free_list); hlist_del_init_rcu(&mark->i.i_list); fsnotify_get_mark(mark); @@ -129,11 +128,10 @@ static struct fsnotify_mark *fsnotify_find_inode_mark_locked( struct inode *inode) { struct fsnotify_mark *mark; - struct hlist_node *pos; assert_spin_locked(&inode->i_lock); - hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) { + hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list) { if (mark->group == group) { fsnotify_get_mark(mark); return mark; @@ -194,8 +192,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, int allow_dups) { - struct fsnotify_mark *lmark; - struct hlist_node *node, *last = NULL; + struct fsnotify_mark *lmark, *last = NULL; int ret = 0; mark->flags |= FSNOTIFY_MARK_FLAG_INODE; @@ -214,8 +211,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, } /* should mark be in the middle of the current list? */ - hlist_for_each_entry(lmark, node, &inode->i_fsnotify_marks, i.i_list) { - last = node; + hlist_for_each_entry(lmark, &inode->i_fsnotify_marks, i.i_list) { + last = lmark; if ((lmark->group == group) && !allow_dups) { ret = -EEXIST; @@ -235,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, BUG_ON(last == NULL); /* mark should be the last entry. last is the current last entry */ - hlist_add_after_rcu(last, &mark->i.i_list); + hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list); out: fsnotify_recalc_inode_mask_locked(inode); spin_unlock(&inode->i_lock); diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 871569c7d609..4216308b81b4 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -197,7 +197,6 @@ static void inotify_free_group_priv(struct fsnotify_group *group) { /* ideally the idr is empty and we won't hit the BUG in the callback */ idr_for_each(&group->inotify_data.idr, idr_callback, group); - idr_remove_all(&group->inotify_data.idr); idr_destroy(&group->inotify_data.idr); atomic_dec(&group->inotify_data.user->inotify_devs); free_uid(group->inotify_data.user); diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 07f7a92fe88e..e0f7c1241a6a 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -364,22 +364,20 @@ static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock, { int ret; - do { - if (unlikely(!idr_pre_get(idr, GFP_KERNEL))) - return -ENOMEM; + idr_preload(GFP_KERNEL); + spin_lock(idr_lock); - spin_lock(idr_lock); - ret = idr_get_new_above(idr, i_mark, *last_wd + 1, - &i_mark->wd); + ret = idr_alloc(idr, i_mark, *last_wd + 1, 0, GFP_NOWAIT); + if (ret >= 0) { /* we added the mark to the idr, take a reference */ - if (!ret) { - *last_wd = i_mark->wd; - fsnotify_get_mark(&i_mark->fsn_mark); - } - spin_unlock(idr_lock); - } while (ret == -EAGAIN); + i_mark->wd = ret; + *last_wd = i_mark->wd; + fsnotify_get_mark(&i_mark->fsn_mark); + } - return ret; + spin_unlock(idr_lock); + idr_preload_end(); + return ret < 0 ? ret : 0; } static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group, diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index 4df58b8ea64a..68ca5a8704b5 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -33,12 +33,12 @@ void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) { struct fsnotify_mark *mark, *lmark; - struct hlist_node *pos, *n; + struct hlist_node *n; struct mount *m = real_mount(mnt); LIST_HEAD(free_list); spin_lock(&mnt->mnt_root->d_lock); - hlist_for_each_entry_safe(mark, pos, n, &m->mnt_fsnotify_marks, m.m_list) { + hlist_for_each_entry_safe(mark, n, &m->mnt_fsnotify_marks, m.m_list) { list_add(&mark->m.free_m_list, &free_list); hlist_del_init_rcu(&mark->m.m_list); fsnotify_get_mark(mark); @@ -71,12 +71,11 @@ static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt) { struct mount *m = real_mount(mnt); struct fsnotify_mark *mark; - struct hlist_node *pos; __u32 new_mask = 0; assert_spin_locked(&mnt->mnt_root->d_lock); - hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list) + hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list) new_mask |= mark->mask; m->mnt_fsnotify_mask = new_mask; } @@ -114,11 +113,10 @@ static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_ { struct mount *m = real_mount(mnt); struct fsnotify_mark *mark; - struct hlist_node *pos; assert_spin_locked(&mnt->mnt_root->d_lock); - hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list) { + hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list) { if (mark->group == group) { fsnotify_get_mark(mark); return mark; @@ -153,8 +151,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, int allow_dups) { struct mount *m = real_mount(mnt); - struct fsnotify_mark *lmark; - struct hlist_node *node, *last = NULL; + struct fsnotify_mark *lmark, *last = NULL; int ret = 0; mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; @@ -173,8 +170,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, } /* should mark be in the middle of the current list? */ - hlist_for_each_entry(lmark, node, &m->mnt_fsnotify_marks, m.m_list) { - last = node; + hlist_for_each_entry(lmark, &m->mnt_fsnotify_marks, m.m_list) { + last = lmark; if ((lmark->group == group) && !allow_dups) { ret = -EEXIST; @@ -194,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, BUG_ON(last == NULL); /* mark should be the last entry. last is the current last entry */ - hlist_add_after_rcu(last, &mark->m.m_list); + hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list); out: fsnotify_recalc_vfsmount_mask_locked(mnt); spin_unlock(&mnt->mnt_root->d_lock); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 0d2bf566e39a..aa88bd8bcedc 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -304,28 +304,22 @@ static u8 o2net_num_from_nn(struct o2net_node *nn) static int o2net_prep_nsw(struct o2net_node *nn, struct o2net_status_wait *nsw) { - int ret = 0; - - do { - if (!idr_pre_get(&nn->nn_status_idr, GFP_ATOMIC)) { - ret = -EAGAIN; - break; - } - spin_lock(&nn->nn_lock); - ret = idr_get_new(&nn->nn_status_idr, nsw, &nsw->ns_id); - if (ret == 0) - list_add_tail(&nsw->ns_node_item, - &nn->nn_status_list); - spin_unlock(&nn->nn_lock); - } while (ret == -EAGAIN); + int ret; - if (ret == 0) { - init_waitqueue_head(&nsw->ns_wq); - nsw->ns_sys_status = O2NET_ERR_NONE; - nsw->ns_status = 0; + spin_lock(&nn->nn_lock); + ret = idr_alloc(&nn->nn_status_idr, nsw, 0, 0, GFP_ATOMIC); + if (ret >= 0) { + nsw->ns_id = ret; + list_add_tail(&nsw->ns_node_item, &nn->nn_status_list); } + spin_unlock(&nn->nn_lock); + if (ret < 0) + return ret; - return ret; + init_waitqueue_head(&nsw->ns_wq); + nsw->ns_sys_status = O2NET_ERR_NONE; + nsw->ns_status = 0; + return 0; } static void o2net_complete_nsw_locked(struct o2net_node *nn, diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 8db4b58b2e4b..ef999729e274 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -169,11 +169,10 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, int skip_unhashed) { - struct hlist_node *p; struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { trace_ocfs2_find_local_alias(dentry->d_name.len, diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 01ebfd0bdad7..eeac97bb3bfa 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2083,7 +2083,6 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, u8 dead_node, u8 new_master) { int i; - struct hlist_node *hash_iter; struct hlist_head *bucket; struct dlm_lock_resource *res, *next; @@ -2114,7 +2113,7 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, * if necessary */ for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_lockres_hash(dlm, i); - hlist_for_each_entry(res, hash_iter, bucket, hash_node) { + hlist_for_each_entry(res, bucket, hash_node) { if (!(res->state & DLM_LOCK_RES_RECOVERING)) continue; @@ -2273,7 +2272,6 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) { - struct hlist_node *iter; struct dlm_lock_resource *res; int i; struct hlist_head *bucket; @@ -2299,7 +2297,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) */ for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_lockres_hash(dlm, i); - hlist_for_each_entry(res, iter, bucket, hash_node) { + hlist_for_each_entry(res, bucket, hash_node) { /* always prune any $RECOVERY entries for dead nodes, * otherwise hangs can occur during later recovery */ if (dlm_is_recovery_lock(res->lockname.name, diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index f169da4624fd..b7e74b580c0f 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -642,7 +642,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle, * cluster groups will be staying in cache for the duration of * this operation. */ - ac->ac_allow_chain_relink = 0; + ac->ac_disable_chain_relink = 1; /* Claim the first region */ status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits, @@ -1823,7 +1823,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, * Do this *after* figuring out how many bits we're taking out * of our target group. */ - if (ac->ac_allow_chain_relink && + if (!ac->ac_disable_chain_relink && (prev_group_bh) && (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) { status = ocfs2_relink_block_group(handle, alloc_inode, @@ -1928,7 +1928,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, victim = ocfs2_find_victim_chain(cl); ac->ac_chain = victim; - ac->ac_allow_chain_relink = 1; status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); @@ -1947,7 +1946,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, * searching each chain in order. Don't allow chain relinking * because we only calculate enough journal credits for one * relink per alloc. */ - ac->ac_allow_chain_relink = 0; + ac->ac_disable_chain_relink = 1; for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { if (i == victim) continue; diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index b8afabfeede4..a36d0aa50911 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -49,7 +49,7 @@ struct ocfs2_alloc_context { /* these are used by the chain search */ u16 ac_chain; - int ac_allow_chain_relink; + int ac_disable_chain_relink; group_search_t *ac_group_search; u64 ac_last_group; diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 0ba9ea1e7961..2e3ea308c144 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -7189,7 +7189,7 @@ int ocfs2_init_security_and_acl(struct inode *dir, struct buffer_head *dir_bh = NULL; ret = ocfs2_init_security_get(inode, dir, qstr, NULL); - if (!ret) { + if (ret) { mlog_errno(ret); goto leave; } diff --git a/fs/proc/base.c b/fs/proc/base.c index f3b133d79914..69078c7cef1f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -73,6 +73,7 @@ #include <linux/security.h> #include <linux/ptrace.h> #include <linux/tracehook.h> +#include <linux/printk.h> #include <linux/cgroup.h> #include <linux/cpuset.h> #include <linux/audit.h> @@ -952,7 +953,7 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf, * /proc/pid/oom_adj is provided for legacy purposes, ask users to use * /proc/pid/oom_score_adj instead. */ - printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", + pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", current->comm, task_pid_nr(current), task_pid_nr(task), task_pid_nr(task)); diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 2983dc52ca25..4b3b3ffb52f1 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -15,6 +15,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/printk.h> #include <linux/mount.h> #include <linux/init.h> #include <linux/idr.h> @@ -132,11 +133,8 @@ __proc_file_read(struct file *file, char __user *buf, size_t nbytes, } if (start == NULL) { - if (n > PAGE_SIZE) { - printk(KERN_ERR - "proc_file_read: Apparent buffer overflow!\n"); + if (n > PAGE_SIZE) /* Apparent buffer overflow */ n = PAGE_SIZE; - } n -= *ppos; if (n <= 0) break; @@ -144,26 +142,19 @@ __proc_file_read(struct file *file, char __user *buf, size_t nbytes, n = count; start = page + *ppos; } else if (start < page) { - if (n > PAGE_SIZE) { - printk(KERN_ERR - "proc_file_read: Apparent buffer overflow!\n"); + if (n > PAGE_SIZE) /* Apparent buffer overflow */ n = PAGE_SIZE; - } if (n > count) { /* * Don't reduce n because doing so might * cut off part of a data block. */ - printk(KERN_WARNING - "proc_file_read: Read count exceeded\n"); + pr_warn("proc_file_read: count exceeded\n"); } } else /* start >= page */ { unsigned long startoff = (unsigned long)(start - page); - if (n > (PAGE_SIZE - startoff)) { - printk(KERN_ERR - "proc_file_read: Apparent buffer overflow!\n"); + if (n > (PAGE_SIZE - startoff)) /* buffer overflow? */ n = PAGE_SIZE - startoff; - } if (n > count) n = count; } @@ -569,7 +560,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp for (tmp = dir->subdir; tmp; tmp = tmp->next) if (strcmp(tmp->name, dp->name) == 0) { - WARN(1, KERN_WARNING "proc_dir_entry '%s/%s' already registered\n", + WARN(1, "proc_dir_entry '%s/%s' already registered\n", dir->name, dp->name); break; } @@ -830,9 +821,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) if (S_ISDIR(de->mode)) parent->nlink--; de->nlink = 0; - WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory " - "'%s/%s', leaking at least '%s'\n", __func__, - de->parent->name, de->name, de->subdir->name); + WARN(de->subdir, "%s: removing non-empty directory " + "'%s/%s', leaking at least '%s'\n", __func__, + de->parent->name, de->name, de->subdir->name); pde_put(de); } EXPORT_SYMBOL(remove_proc_entry); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 70322e1a4f0f..a86aebc9ba7c 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -13,6 +13,7 @@ #include <linux/stat.h> #include <linux/completion.h> #include <linux/poll.h> +#include <linux/printk.h> #include <linux/file.h> #include <linux/limits.h> #include <linux/init.h> @@ -495,13 +496,13 @@ int proc_fill_super(struct super_block *s) pde_get(&proc_root); root_inode = proc_get_inode(s, &proc_root); if (!root_inode) { - printk(KERN_ERR "proc_fill_super: get root inode failed\n"); + pr_err("proc_fill_super: get root inode failed\n"); return -ENOMEM; } s->s_root = d_make_root(root_inode); if (!s->s_root) { - printk(KERN_ERR "proc_fill_super: allocate dentry failed\n"); + pr_err("proc_fill_super: allocate dentry failed\n"); return -ENOMEM; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 252544c05207..85ff3a4598b3 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -11,6 +11,7 @@ #include <linux/sched.h> #include <linux/proc_fs.h> +#include <linux/binfmts.h> struct ctl_table_header; struct mempolicy; @@ -108,7 +109,7 @@ static inline int task_dumpable(struct task_struct *task) if (mm) dumpable = get_dumpable(mm); task_unlock(task); - if (dumpable == SUID_DUMPABLE_ENABLED) + if (dumpable == SUID_DUMP_USER) return 1; return 0; } diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index e96d4f18ca3a..eda6f017f272 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -17,6 +17,7 @@ #include <linux/elfcore.h> #include <linux/vmalloc.h> #include <linux/highmem.h> +#include <linux/printk.h> #include <linux/bootmem.h> #include <linux/init.h> #include <linux/slab.h> @@ -619,7 +620,7 @@ static int __init proc_kcore_init(void) proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); if (!proc_root_kcore) { - printk(KERN_ERR "couldn't create /proc/kcore\n"); + pr_err("couldn't create /proc/kcore\n"); return 0; /* Always returns 0. */ } /* Store text area if it's special */ diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index de20ec480fa0..30b590f5bd35 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -8,6 +8,7 @@ #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/printk.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/of.h> @@ -110,8 +111,8 @@ void proc_device_tree_update_prop(struct proc_dir_entry *pde, if (ent->data == oldprop) break; if (ent == NULL) { - printk(KERN_WARNING "device-tree: property \"%s\" " - " does not exist\n", oldprop->name); + pr_warn("device-tree: property \"%s\" does not exist\n", + oldprop->name); } else { ent->data = newprop; ent->size = newprop->length; @@ -153,8 +154,8 @@ static const char *fixup_name(struct device_node *np, struct proc_dir_entry *de, realloc: fixed_name = kmalloc(fixup_len, GFP_KERNEL); if (fixed_name == NULL) { - printk(KERN_ERR "device-tree: Out of memory trying to fixup " - "name \"%s\"\n", name); + pr_err("device-tree: Out of memory trying to fixup " + "name \"%s\"\n", name); return name; } @@ -175,8 +176,8 @@ retry: goto retry; } - printk(KERN_WARNING "device-tree: Duplicate name in %s, " - "renamed to \"%s\"\n", np->full_name, fixed_name); + pr_warn("device-tree: Duplicate name in %s, renamed to \"%s\"\n", + np->full_name, fixed_name); return fixed_name; } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 612df79cc6a1..ac05f33a0dde 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -5,6 +5,7 @@ #include <linux/sysctl.h> #include <linux/poll.h> #include <linux/proc_fs.h> +#include <linux/printk.h> #include <linux/security.h> #include <linux/sched.h> #include <linux/namei.h> @@ -57,7 +58,7 @@ static void sysctl_print_dir(struct ctl_dir *dir) { if (dir->header.parent) sysctl_print_dir(dir->header.parent); - printk(KERN_CONT "%s/", dir->header.ctl_table[0].procname); + pr_cont("%s/", dir->header.ctl_table[0].procname); } static int namecmp(const char *name1, int len1, const char *name2, int len2) @@ -134,9 +135,9 @@ static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) else if (cmp > 0) p = &(*p)->rb_right; else { - printk(KERN_ERR "sysctl duplicate entry: "); + pr_err("sysctl duplicate entry: "); sysctl_print_dir(head->parent); - printk(KERN_CONT "/%s\n", entry->procname); + pr_cont("/%s\n", entry->procname); return -EEXIST; } } @@ -927,9 +928,9 @@ found: subdir->header.nreg++; failed: if (unlikely(IS_ERR(subdir))) { - printk(KERN_ERR "sysctl could not get directory: "); + pr_err("sysctl could not get directory: "); sysctl_print_dir(dir); - printk(KERN_CONT "/%*.*s %ld\n", + pr_cont("/%*.*s %ld\n", namelen, namelen, name, PTR_ERR(subdir)); } drop_sysctl_table(&dir->header); @@ -995,8 +996,8 @@ static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n", - path, table->procname, &vaf); + pr_err("sysctl table check failed: %s/%s %pV\n", + path, table->procname, &vaf); va_end(args); return -EINVAL; @@ -1510,9 +1511,9 @@ static void put_links(struct ctl_table_header *header) drop_sysctl_table(link_head); } else { - printk(KERN_ERR "sysctl link missing during unregister: "); + pr_err("sysctl link missing during unregister: "); sysctl_print_dir(parent); - printk(KERN_CONT "/%s\n", name); + pr_cont("/%s\n", name); } } } diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 0d5071d29985..b870f740ab5a 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -15,6 +15,7 @@ #include <linux/export.h> #include <linux/slab.h> #include <linux/highmem.h> +#include <linux/printk.h> #include <linux/bootmem.h> #include <linux/init.h> #include <linux/crash_dump.h> @@ -175,15 +176,15 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m); if (!curr_m) return -EINVAL; - if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) - tsz = buflen; - - /* Calculate left bytes in current memory segment. */ - nr_bytes = (curr_m->size - (start - curr_m->paddr)); - if (tsz > nr_bytes) - tsz = nr_bytes; while (buflen) { + tsz = min_t(size_t, buflen, PAGE_SIZE - (start & ~PAGE_MASK)); + + /* Calculate left bytes in current memory segment. */ + nr_bytes = (curr_m->size - (start - curr_m->paddr)); + if (tsz > nr_bytes) + tsz = nr_bytes; + tmp = read_from_oldmem(buffer, tsz, &start, 1); if (tmp < 0) return tmp; @@ -198,12 +199,6 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, struct vmcore, list); start = curr_m->paddr; } - if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) - tsz = buflen; - /* Calculate left bytes in current memory segment. */ - nr_bytes = (curr_m->size - (start - curr_m->paddr)); - if (tsz > nr_bytes) - tsz = nr_bytes; } return acc; } @@ -553,8 +548,7 @@ static int __init parse_crash_elf64_headers(void) ehdr.e_ehsize != sizeof(Elf64_Ehdr) || ehdr.e_phentsize != sizeof(Elf64_Phdr) || ehdr.e_phnum == 0) { - printk(KERN_WARNING "Warning: Core image elf header is not" - "sane\n"); + pr_warn("Warning: Core image elf header is not sane\n"); return -EINVAL; } @@ -609,8 +603,7 @@ static int __init parse_crash_elf32_headers(void) ehdr.e_ehsize != sizeof(Elf32_Ehdr) || ehdr.e_phentsize != sizeof(Elf32_Phdr) || ehdr.e_phnum == 0) { - printk(KERN_WARNING "Warning: Core image elf header is not" - "sane\n"); + pr_warn("Warning: Core image elf header is not sane\n"); return -EINVAL; } @@ -653,8 +646,7 @@ static int __init parse_crash_elf_headers(void) if (rc < 0) return rc; if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) { - printk(KERN_WARNING "Warning: Core image elf header" - " not found\n"); + pr_warn("Warning: Core image elf header not found\n"); return -EINVAL; } @@ -673,8 +665,7 @@ static int __init parse_crash_elf_headers(void) /* Determine vmcore size. */ vmcore_size = get_vmcore_size_elf32(elfcorebuf); } else { - printk(KERN_WARNING "Warning: Core image elf header is not" - " sane\n"); + pr_warn("Warning: Core image elf header is not sane\n"); return -EINVAL; } return 0; @@ -690,7 +681,7 @@ static int __init vmcore_init(void) return rc; rc = parse_crash_elf_headers(); if (rc) { - printk(KERN_WARNING "Kdump: vmcore not initialized\n"); + pr_warn("Kdump: vmcore not initialized\n"); return rc; } diff --git a/fs/seq_file.c b/fs/seq_file.c index f2bc3dfd0b88..15c6304bab71 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -308,27 +308,27 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence) mutex_lock(&m->lock); m->version = file->f_version; switch (whence) { - case 1: - offset += file->f_pos; - case 0: - if (offset < 0) - break; - retval = offset; - if (offset != m->read_pos) { - while ((retval=traverse(m, offset)) == -EAGAIN) - ; - if (retval) { - /* with extreme prejudice... */ - file->f_pos = 0; - m->read_pos = 0; - m->version = 0; - m->index = 0; - m->count = 0; - } else { - m->read_pos = offset; - retval = file->f_pos = offset; - } + case SEEK_CUR: + offset += file->f_pos; + case SEEK_SET: + if (offset < 0) + break; + retval = offset; + if (offset != m->read_pos) { + while ((retval = traverse(m, offset)) == -EAGAIN) + ; + if (retval) { + /* with extreme prejudice... */ + file->f_pos = 0; + m->read_pos = 0; + m->version = 0; + m->index = 0; + m->count = 0; + } else { + m->read_pos = offset; + retval = file->f_pos = offset; } + } } file->f_version = m->version; mutex_unlock(&m->lock); diff --git a/fs/super.c b/fs/super.c index 12f123712161..7465d4364208 100644 --- a/fs/super.c +++ b/fs/super.c @@ -447,14 +447,13 @@ struct super_block *sget(struct file_system_type *type, void *data) { struct super_block *s = NULL; - struct hlist_node *node; struct super_block *old; int err; retry: spin_lock(&sb_lock); if (test) { - hlist_for_each_entry(old, node, &type->fs_supers, s_instances) { + hlist_for_each_entry(old, &type->fs_supers, s_instances) { if (!test(old, data)) continue; if (!grab_super(old)) @@ -554,10 +553,9 @@ void iterate_supers_type(struct file_system_type *type, void (*f)(struct super_block *, void *), void *arg) { struct super_block *sb, *p = NULL; - struct hlist_node *node; spin_lock(&sb_lock); - hlist_for_each_entry(sb, node, &type->fs_supers, s_instances) { + hlist_for_each_entry(sb, &type->fs_supers, s_instances) { sb->s_count++; spin_unlock(&sb_lock); @@ -842,7 +840,7 @@ int get_anon_bdev(dev_t *p) else if (error) return -EAGAIN; - if ((dev & MAX_IDR_MASK) == (1 << MINORBITS)) { + if (dev == (1 << MINORBITS)) { spin_lock(&unnamed_dev_lock); ida_remove(&unnamed_dev_ida, dev); if (unnamed_dev_start > dev) diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 2ce9a5db6ab5..15c68f9489ae 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -461,14 +461,13 @@ const struct file_operations bin_fops = { void unmap_bin_file(struct sysfs_dirent *attr_sd) { struct bin_buffer *bb; - struct hlist_node *tmp; if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR) return; mutex_lock(&sysfs_bin_lock); - hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) { + hlist_for_each_entry(bb, &attr_sd->s_bin_attr.buffers, list) { struct inode *inode = file_inode(bb->file); unmap_mapping_range(inode->i_mapping, 0, 0, 1); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 96fcbb85ff83..d1dba7ce75ae 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1442,9 +1442,8 @@ xlog_recover_find_tid( xlog_tid_t tid) { xlog_recover_t *trans; - struct hlist_node *n; - hlist_for_each_entry(trans, n, head, r_list) { + hlist_for_each_entry(trans, head, r_list) { if (trans->r_log_tid == tid) return trans; } |