summaryrefslogtreecommitdiff
path: root/fs/ceph/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/super.c')
-rw-r--r--fs/ceph/super.c251
1 files changed, 167 insertions, 84 deletions
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index f888cf487b7..9922628532b 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -2,20 +2,18 @@
#include "ceph_debug.h"
#include <linux/backing-dev.h>
+#include <linux/ctype.h>
#include <linux/fs.h>
#include <linux/inet.h>
#include <linux/in6.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/parser.h>
-#include <linux/rwsem.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/statfs.h>
#include <linux/string.h>
-#include <linux/version.h>
-#include <linux/vmalloc.h>
#include "decode.h"
#include "super.h"
@@ -47,10 +45,20 @@ const char *ceph_file_part(const char *s, int len)
*/
static void ceph_put_super(struct super_block *s)
{
- struct ceph_client *cl = ceph_client(s);
+ struct ceph_client *client = ceph_sb_to_client(s);
dout("put_super\n");
- ceph_mdsc_close_sessions(&cl->mdsc);
+ ceph_mdsc_close_sessions(&client->mdsc);
+
+ /*
+ * ensure we release the bdi before put_anon_super releases
+ * the device name.
+ */
+ if (s->s_bdi == &client->backing_dev_info) {
+ bdi_unregister(&client->backing_dev_info);
+ s->s_bdi = NULL;
+ }
+
return;
}
@@ -82,7 +90,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = le64_to_cpu(st.num_objects);
buf->f_ffree = -1;
- buf->f_namelen = PATH_MAX;
+ buf->f_namelen = NAME_MAX;
buf->f_frsize = PAGE_CACHE_SIZE;
/* leave fsid little-endian, regardless of host endianness */
@@ -94,15 +102,52 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
}
-static int ceph_syncfs(struct super_block *sb, int wait)
+static int ceph_sync_fs(struct super_block *sb, int wait)
{
- dout("sync_fs %d\n", wait);
- ceph_osdc_sync(&ceph_client(sb)->osdc);
- ceph_mdsc_sync(&ceph_client(sb)->mdsc);
- dout("sync_fs %d done\n", wait);
+ struct ceph_client *client = ceph_sb_to_client(sb);
+
+ if (!wait) {
+ dout("sync_fs (non-blocking)\n");
+ ceph_flush_dirty_caps(&client->mdsc);
+ dout("sync_fs (non-blocking) done\n");
+ return 0;
+ }
+
+ dout("sync_fs (blocking)\n");
+ ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc);
+ ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc);
+ dout("sync_fs (blocking) done\n");
return 0;
}
+static int default_congestion_kb(void)
+{
+ int congestion_kb;
+
+ /*
+ * Copied from NFS
+ *
+ * congestion size, scale with available memory.
+ *
+ * 64MB: 8192k
+ * 128MB: 11585k
+ * 256MB: 16384k
+ * 512MB: 23170k
+ * 1GB: 32768k
+ * 2GB: 46340k
+ * 4GB: 65536k
+ * 8GB: 92681k
+ * 16GB: 131072k
+ *
+ * This allows larger machines to have larger/more transfers.
+ * Limit the default to 256M
+ */
+ congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
+ if (congestion_kb > 256*1024)
+ congestion_kb = 256*1024;
+
+ return congestion_kb;
+}
/**
* ceph_show_options - Show mount options in /proc/mounts
@@ -115,9 +160,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
struct ceph_mount_args *args = client->mount_args;
if (args->flags & CEPH_OPT_FSID)
- seq_printf(m, ",fsidmajor=%llu,fsidminor%llu",
- le64_to_cpu(*(__le64 *)&args->fsid.fsid[0]),
- le64_to_cpu(*(__le64 *)&args->fsid.fsid[8]));
+ seq_printf(m, ",fsid=%pU", &args->fsid);
if (args->flags & CEPH_OPT_NOSHARE)
seq_puts(m, ",noshare");
if (args->flags & CEPH_OPT_DIRSTAT)
@@ -128,6 +171,35 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
seq_puts(m, ",nocrc");
if (args->flags & CEPH_OPT_NOASYNCREADDIR)
seq_puts(m, ",noasyncreaddir");
+
+ if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
+ seq_printf(m, ",mount_timeout=%d", args->mount_timeout);
+ if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
+ seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl);
+ if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
+ seq_printf(m, ",osdtimeout=%d", args->osd_timeout);
+ if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
+ seq_printf(m, ",osdkeepalivetimeout=%d",
+ args->osd_keepalive_timeout);
+ if (args->wsize)
+ seq_printf(m, ",wsize=%d", args->wsize);
+ if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT)
+ seq_printf(m, ",rsize=%d", args->rsize);
+ if (args->congestion_kb != default_congestion_kb())
+ seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb);
+ if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
+ seq_printf(m, ",caps_wanted_delay_min=%d",
+ args->caps_wanted_delay_min);
+ if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
+ seq_printf(m, ",caps_wanted_delay_max=%d",
+ args->caps_wanted_delay_max);
+ if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
+ seq_printf(m, ",cap_release_safety=%d",
+ args->cap_release_safety);
+ if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT)
+ seq_printf(m, ",readdir_max_entries=%d", args->max_readdir);
+ if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
+ seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes);
if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
seq_printf(m, ",snapdirname=%s", args->snapdir_name);
if (args->name)
@@ -151,35 +223,6 @@ static void ceph_inode_init_once(void *foo)
inode_init_once(&ci->vfs_inode);
}
-static int default_congestion_kb(void)
-{
- int congestion_kb;
-
- /*
- * Copied from NFS
- *
- * congestion size, scale with available memory.
- *
- * 64MB: 8192k
- * 128MB: 11585k
- * 256MB: 16384k
- * 512MB: 23170k
- * 1GB: 32768k
- * 2GB: 46340k
- * 4GB: 65536k
- * 8GB: 92681k
- * 16GB: 131072k
- *
- * This allows larger machines to have larger/more transfers.
- * Limit the default to 256M
- */
- congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
- if (congestion_kb > 256*1024)
- congestion_kb = 256*1024;
-
- return congestion_kb;
-}
-
static int __init init_caches(void)
{
ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
@@ -244,7 +287,7 @@ static const struct super_operations ceph_super_ops = {
.alloc_inode = ceph_alloc_inode,
.destroy_inode = ceph_destroy_inode,
.write_inode = ceph_write_inode,
- .sync_fs = ceph_syncfs,
+ .sync_fs = ceph_sync_fs,
.put_super = ceph_put_super,
.show_options = ceph_show_options,
.statfs = ceph_statfs,
@@ -287,9 +330,6 @@ const char *ceph_msg_type_name(int type)
* mount options
*/
enum {
- Opt_fsidmajor,
- Opt_fsidminor,
- Opt_monport,
Opt_wsize,
Opt_rsize,
Opt_osdtimeout,
@@ -298,10 +338,13 @@ enum {
Opt_osd_idle_ttl,
Opt_caps_wanted_delay_min,
Opt_caps_wanted_delay_max,
+ Opt_cap_release_safety,
Opt_readdir_max_entries,
+ Opt_readdir_max_bytes,
Opt_congestion_kb,
Opt_last_int,
/* int args above */
+ Opt_fsid,
Opt_snapdirname,
Opt_name,
Opt_secret,
@@ -318,9 +361,6 @@ enum {
};
static match_table_t arg_tokens = {
- {Opt_fsidmajor, "fsidmajor=%ld"},
- {Opt_fsidminor, "fsidminor=%ld"},
- {Opt_monport, "monport=%d"},
{Opt_wsize, "wsize=%d"},
{Opt_rsize, "rsize=%d"},
{Opt_osdtimeout, "osdtimeout=%d"},
@@ -329,9 +369,12 @@ static match_table_t arg_tokens = {
{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
{Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
{Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
+ {Opt_cap_release_safety, "cap_release_safety=%d"},
{Opt_readdir_max_entries, "readdir_max_entries=%d"},
+ {Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
{Opt_congestion_kb, "write_congestion_kb=%d"},
/* int args above */
+ {Opt_fsid, "fsid=%s"},
{Opt_snapdirname, "snapdirname=%s"},
{Opt_name, "name=%s"},
{Opt_secret, "secret=%s"},
@@ -347,6 +390,36 @@ static match_table_t arg_tokens = {
{-1, NULL}
};
+static int parse_fsid(const char *str, struct ceph_fsid *fsid)
+{
+ int i = 0;
+ char tmp[3];
+ int err = -EINVAL;
+ int d;
+
+ dout("parse_fsid '%s'\n", str);
+ tmp[2] = 0;
+ while (*str && i < 16) {
+ if (ispunct(*str)) {
+ str++;
+ continue;
+ }
+ if (!isxdigit(str[0]) || !isxdigit(str[1]))
+ break;
+ tmp[0] = str[0];
+ tmp[1] = str[1];
+ if (sscanf(tmp, "%x", &d) < 1)
+ break;
+ fsid->fsid[i] = d & 0xff;
+ i++;
+ str += 2;
+ }
+
+ if (i == 16)
+ err = 0;
+ dout("parse_fsid ret %d got fsid %pU", err, fsid);
+ return err;
+}
static struct ceph_mount_args *parse_mount_args(int flags, char *options,
const char *dev_name,
@@ -378,8 +451,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options,
args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
args->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
- args->cap_release_safety = CEPH_CAPS_PER_RELEASE * 4;
- args->max_readdir = 1024;
+ args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
+ args->max_readdir = CEPH_MAX_READDIR_DEFAULT;
+ args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
args->congestion_kb = default_congestion_kb();
/* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
@@ -429,12 +503,6 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options,
dout("got token %d\n", token);
}
switch (token) {
- case Opt_fsidmajor:
- *(__le64 *)&args->fsid.fsid[0] = cpu_to_le64(intval);
- break;
- case Opt_fsidminor:
- *(__le64 *)&args->fsid.fsid[8] = cpu_to_le64(intval);
- break;
case Opt_ip:
err = ceph_parse_ips(argstr[0].from,
argstr[0].to,
@@ -445,6 +513,11 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options,
args->flags |= CEPH_OPT_MYIP;
break;
+ case Opt_fsid:
+ err = parse_fsid(argstr[0].from, &args->fsid);
+ if (err == 0)
+ args->flags |= CEPH_OPT_FSID;
+ break;
case Opt_snapdirname:
kfree(args->snapdir_name);
args->snapdir_name = kstrndup(argstr[0].from,
@@ -475,6 +548,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options,
case Opt_osdkeepalivetimeout:
args->osd_keepalive_timeout = intval;
break;
+ case Opt_osd_idle_ttl:
+ args->osd_idle_ttl = intval;
+ break;
case Opt_mount_timeout:
args->mount_timeout = intval;
break;
@@ -487,6 +563,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options,
case Opt_readdir_max_entries:
args->max_readdir = intval;
break;
+ case Opt_readdir_max_bytes:
+ args->max_readdir_bytes = intval;
+ break;
case Opt_congestion_kb:
args->congestion_kb = intval;
break;
@@ -587,7 +666,6 @@ static struct ceph_client *ceph_create_client(struct ceph_mount_args *args)
/* caps */
client->min_caps = args->max_readdir;
- ceph_adjust_min_caps(client->min_caps);
/* subsystems */
err = ceph_monc_init(&client->monc, client);
@@ -626,16 +704,24 @@ static void ceph_destroy_client(struct ceph_client *client)
/* unmount */
ceph_mdsc_stop(&client->mdsc);
- ceph_monc_stop(&client->monc);
ceph_osdc_stop(&client->osdc);
- ceph_adjust_min_caps(-client->min_caps);
+ /*
+ * make sure mds and osd connections close out before destroying
+ * the auth module, which is needed to free those connections'
+ * ceph_authorizers.
+ */
+ ceph_msgr_flush();
+
+ ceph_monc_stop(&client->monc);
ceph_debugfs_client_cleanup(client);
destroy_workqueue(client->wb_wq);
destroy_workqueue(client->pg_inv_wq);
destroy_workqueue(client->trunc_wq);
+ bdi_destroy(&client->backing_dev_info);
+
if (client->msgr)
ceph_messenger_destroy(client->msgr);
mempool_destroy(client->wb_pagevec_pool);
@@ -653,13 +739,13 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
{
if (client->have_fsid) {
if (ceph_fsid_compare(&client->fsid, fsid)) {
- pr_err("bad fsid, had " FSID_FORMAT " got " FSID_FORMAT,
- PR_FSID(&client->fsid), PR_FSID(fsid));
+ pr_err("bad fsid, had %pU got %pU",
+ &client->fsid, fsid);
return -1;
}
} else {
- pr_info("client%lld fsid " FSID_FORMAT "\n",
- client->monc.auth->global_id, PR_FSID(fsid));
+ pr_info("client%lld fsid %pU\n", client->monc.auth->global_id,
+ fsid);
memcpy(&client->fsid, fsid, sizeof(*fsid));
ceph_debugfs_client_init(client);
client->have_fsid = true;
@@ -670,9 +756,10 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
/*
* true if we have the mon map (and have thus joined the cluster)
*/
-static int have_mon_map(struct ceph_client *client)
+static int have_mon_and_osd_map(struct ceph_client *client)
{
- return client->monc.monmap && client->monc.monmap->epoch;
+ return client->monc.monmap && client->monc.monmap->epoch &&
+ client->osdc.osdmap && client->osdc.osdmap->epoch;
}
/*
@@ -692,7 +779,7 @@ static struct dentry *open_root_dentry(struct ceph_client *client,
dout("open_root_inode opening '%s'\n", path);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
if (IS_ERR(req))
- return ERR_PTR(PTR_ERR(req));
+ return ERR_CAST(req);
req->r_path1 = kstrdup(path, GFP_NOFS);
req->r_ino1.ino = CEPH_INO_ROOT;
req->r_ino1.snap = CEPH_NOSNAP;
@@ -750,7 +837,7 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt,
if (err < 0)
goto out;
- while (!have_mon_map(client)) {
+ while (!have_mon_and_osd_map(client)) {
err = -EIO;
if (timeout && time_after_eq(jiffies, started + timeout))
goto out;
@@ -758,8 +845,8 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt,
/* wait */
dout("mount waiting for mon_map\n");
err = wait_event_interruptible_timeout(client->auth_wq,
- have_mon_map(client) || (client->auth_err < 0),
- timeout);
+ have_mon_and_osd_map(client) || (client->auth_err < 0),
+ timeout);
if (err == -EINTR || err == -ERESTARTSYS)
goto out;
if (client->auth_err < 0) {
@@ -872,18 +959,21 @@ static int ceph_compare_super(struct super_block *sb, void *data)
/*
* construct our own bdi so we can control readahead, etc.
*/
+static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
+
static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
{
int err;
- sb->s_bdi = &client->backing_dev_info;
-
/* set ra_pages based on rsize mount option? */
if (client->mount_args->rsize >= PAGE_CACHE_SIZE)
client->backing_dev_info.ra_pages =
(client->mount_args->rsize + PAGE_CACHE_SIZE - 1)
>> PAGE_SHIFT;
- err = bdi_register_dev(&client->backing_dev_info, sb->s_dev);
+ err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d",
+ atomic_long_inc_return(&bdi_seq));
+ if (!err)
+ sb->s_bdi = &client->backing_dev_info;
return err;
}
@@ -920,9 +1010,9 @@ static int ceph_get_sb(struct file_system_type *fs_type,
goto out;
}
- if (ceph_client(sb) != client) {
+ if (ceph_sb_to_client(sb) != client) {
ceph_destroy_client(client);
- client = ceph_client(sb);
+ client = ceph_sb_to_client(sb);
dout("get_sb got existing client %p\n", client);
} else {
dout("get_sb using new client %p\n", client);
@@ -940,8 +1030,7 @@ static int ceph_get_sb(struct file_system_type *fs_type,
out_splat:
ceph_mdsc_close_sessions(&client->mdsc);
- up_write(&sb->s_umount);
- deactivate_super(sb);
+ deactivate_locked_super(sb);
goto out_final;
out:
@@ -957,9 +1046,6 @@ static void ceph_kill_sb(struct super_block *s)
dout("kill_sb %p\n", s);
ceph_mdsc_pre_umount(&client->mdsc);
kill_anon_super(s); /* will call put_super after sb is r/o */
- if (s->s_bdi == &client->backing_dev_info)
- bdi_unregister(&client->backing_dev_info);
- bdi_destroy(&client->backing_dev_info);
ceph_destroy_client(client);
}
@@ -990,8 +1076,6 @@ static int __init init_ceph(void)
if (ret)
goto out_msgr;
- ceph_caps_init();
-
ret = register_filesystem(&ceph_fs_type);
if (ret)
goto out_icache;
@@ -1016,7 +1100,6 @@ static void __exit exit_ceph(void)
{
dout("exit_ceph\n");
unregister_filesystem(&ceph_fs_type);
- ceph_caps_finalize();
destroy_caches();
ceph_msgr_exit();
ceph_debugfs_cleanup();