/* * device-discovery.c: main function, discovering device and processing * pipe request from kernel. * * Copyright (c) 2010 EMC Corporation, Haiying Tang * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "device-discovery.h" #define EVENT_SIZE (sizeof(struct inotify_event)) #define EVENT_BUFSIZE (1024 * EVENT_SIZE) #define BL_PIPE_FILE "/var/lib/nfs/rpc_pipefs/nfs/blocklayout" #define NFSPIPE_DIR "/var/lib/nfs/rpc_pipefs/nfs" #define RPCPIPE_DIR "/var/lib/nfs/rpc_pipefs" #define PID_FILE "/var/run/blkmapd.pid" struct bl_disk *visible_disk_list; int bl_watch_fd, bl_pipe_fd, nfs_pipedir_wfd, rpc_pipedir_wfd; int pidfd = -1; struct bl_disk_path *bl_get_path(const char *filepath, struct bl_disk_path *paths) { struct bl_disk_path *tmp = paths; while (tmp) { if (!strcmp(tmp->full_path, filepath)) break; tmp = tmp->next; } return tmp; } /* * For multipath devices, devices state could be PASSIVE/ACTIVE/PSEUDO, * where PSEUDO > ACTIVE > PASSIVE. Device with highest state is used to * create pseudo device. So if state is higher, the device path needs to * be updated. * If device-mapper multipath support is a must, pseudo devices should * exist for each multipath device. If not, active device path will be * chosen for device creation. */ int bl_update_path(enum bl_path_state_e state, struct bl_disk *disk) { struct bl_disk_path *valid_path = disk->valid_path; if (valid_path && valid_path->state >= state) return 0; return 1; } void bl_release_disk(void) { struct bl_disk *disk; struct bl_disk_path *path = NULL; while (visible_disk_list) { disk = visible_disk_list; path = disk->paths; while (path) { disk->paths = path->next; free(path->full_path); free(path); path = disk->paths; } if (disk->serial) free(disk->serial); visible_disk_list = disk->next; free(disk); } } void bl_add_disk(char *filepath) { struct bl_disk *disk = NULL; int fd = 0; struct stat sb; off_t size = 0; struct bl_serial *serial = NULL; enum bl_path_state_e ap_state; struct bl_disk_path *diskpath = NULL, *path = NULL; dev_t dev; fd = open(filepath, O_RDONLY | O_LARGEFILE); if (fd < 0) return; if (fstat(fd, &sb)) { close(fd); return; } if (!sb.st_size) ioctl(fd, BLKGETSIZE, &size); else size = sb.st_size; if (!size) { close(fd); return; } dev = sb.st_rdev; serial = bldev_read_serial(fd, filepath); if (!serial) { BL_LOG_ERR("%s: no serial found for %s\n", __func__, filepath); ap_state = BL_PATH_STATE_PASSIVE; } else if (dm_is_dm_major(major(dev))) ap_state = BL_PATH_STATE_PSEUDO; else ap_state = bldev_read_ap_state(fd); close(fd); for (disk = visible_disk_list; disk != NULL; disk = disk->next) { /* Already scanned or a partition? * XXX: if released each time, maybe not need to compare */ if ((serial->len == disk->serial->len) && !memcmp(serial->data, disk->serial->data, serial->len)) { diskpath = bl_get_path(filepath, disk->paths); break; } } if (disk && diskpath) return; /* add path */ path = malloc(sizeof(struct bl_disk_path)); if (!path) { BL_LOG_ERR("%s: Out of memory!\n", __func__); goto out_err; } path->next = NULL; path->state = ap_state; path->full_path = strdup(filepath); if (!path->full_path) goto out_err; if (!disk) { /* add disk */ disk = malloc(sizeof(struct bl_disk)); if (!disk) { BL_LOG_ERR("%s: Out of memory!\n", __func__); goto out_err; } disk->next = visible_disk_list; disk->dev = dev; disk->size = size; disk->serial = serial; disk->valid_path = path; disk->paths = path; visible_disk_list = disk; } else { path->next = disk->paths; disk->paths = path; /* check whether we need to update disk info */ if (bl_update_path(path->state, disk)) { disk->dev = dev; disk->size = size; disk->valid_path = path; } } return; out_err: if (path) { if (path->full_path) free(path->full_path); free(path); } return; } int bl_discover_devices(void) { FILE *f; int n; char buf[PATH_MAX], devname[PATH_MAX], fulldevname[PATH_MAX]; /* release previous list */ bl_release_disk(); /* scan all block devices */ f = fopen("/proc/partitions", "r"); if (f == NULL) return 0; while (1) { if (fgets(buf, sizeof buf, f) == NULL) break; n = sscanf(buf, "%*d %*d %*d %31s", devname); if (n != 1) continue; snprintf(fulldevname, sizeof fulldevname, "/sys/block/%s", devname); if (access(fulldevname, F_OK) < 0) continue; snprintf(fulldevname, sizeof fulldevname, "/dev/%s", devname); bl_add_disk(fulldevname); } fclose(f); return 0; } /* process kernel request * return 0: request processed, and no more request waiting; * return 1: request processed, and more requests waiting; * return < 0: error */ static int bl_disk_inquiry_process(int fd) { int ret = 0; struct bl_pipemsg_hdr head; char *buf = NULL; uint32_t major, minor; uint16_t buflen; struct bl_dev_msg reply; /* read request */ if (atomicio(read, fd, &head, sizeof(head)) != sizeof(head)) { /* Note that an error in this or the next read is pretty * catastrophic, as there is no good way to resync into * the pipe's stream. */ BL_LOG_ERR("Read pipefs head error!\n"); ret = -EIO; goto out; } buflen = head.totallen; buf = malloc(buflen); if (!buf) { BL_LOG_ERR("%s: Out of memory!\n", __func__); ret = -ENOMEM; goto out; } if (atomicio(read, fd, buf, buflen) != buflen) { BL_LOG_ERR("Read pipefs content error!\n"); ret = -EIO; goto out; } reply.status = BL_DEVICE_REQUEST_PROC; switch (head.type) { case BL_DEVICE_MOUNT: /* * It shouldn't be necessary to discover devices here, since * process_deviceinfo() will re-discover if it can't find * the devices it needs. But in the case of multipath * devices (ones that appear more than once, for example an * active and a standby LUN), this will re-order them in the * correct priority. */ bl_discover_devices(); if (!process_deviceinfo(buf, buflen, &major, &minor)) { reply.status = BL_DEVICE_REQUEST_ERR; break; } reply.major = major; reply.minor = minor; break; case BL_DEVICE_UMOUNT: if (!dm_device_remove_all((uint64_t *) buf)) reply.status = BL_DEVICE_REQUEST_ERR; break; default: reply.status = BL_DEVICE_REQUEST_ERR; break; } /* write to pipefs */ if (atomicio((void *)write, fd, &reply, sizeof(reply)) != sizeof(reply)) { BL_LOG_ERR("Write pipefs error!\n"); ret = -EIO; } out: if (buf) free(buf); return ret; } static void bl_watch_dir(const char* dir, int *wd) { *wd = inotify_add_watch(bl_watch_fd, dir, IN_CREATE|IN_DELETE); if (*wd < 0) BL_LOG_ERR("failed to watch %s: %s\n", dir, strerror(errno)); } static void bl_rpcpipe_cb(void) { int rc, curr_byte = 0; char eventArr[EVENT_BUFSIZE]; struct inotify_event *event; rc = read(bl_watch_fd, &eventArr, EVENT_BUFSIZE); if (rc < 0) BL_LOG_ERR("read event fail: %s", strerror(errno)); while (rc > curr_byte) { event = (struct inotify_event *)&eventArr[curr_byte]; curr_byte += EVENT_SIZE + event->len; if (event->wd == rpc_pipedir_wfd) { if (strncmp(event->name, "nfs", 3)) continue; if (event->mask & IN_CREATE) { BL_LOG_WARNING("nfs pipe dir created\n"); bl_watch_dir(NFSPIPE_DIR, &nfs_pipedir_wfd); bl_pipe_fd = open(BL_PIPE_FILE, O_RDWR); } else if (event->mask & IN_DELETE) { BL_LOG_WARNING("nfs pipe dir deleted\n"); inotify_rm_watch(bl_watch_fd, nfs_pipedir_wfd); close(bl_pipe_fd); nfs_pipedir_wfd = -1; bl_pipe_fd = -1; } } else if (event->wd == nfs_pipedir_wfd) { if (strncmp(event->name, "blocklayout", 11)) continue; if (event->mask & IN_CREATE) { BL_LOG_WARNING("blocklayout pipe file created\n"); bl_pipe_fd = open(BL_PIPE_FILE, O_RDWR); if (bl_pipe_fd < 0) BL_LOG_ERR("open %s failed: %s\n", event->name, strerror(errno)); } else if (event->mask & IN_DELETE) { BL_LOG_WARNING("blocklayout pipe file deleted\n"); close(bl_pipe_fd); bl_pipe_fd = -1; } } } } static int bl_event_helper(void) { fd_set rset; int ret = 0, maxfd; for (;;) { FD_ZERO(&rset); FD_SET(bl_watch_fd, &rset); if (bl_pipe_fd > 0) FD_SET(bl_pipe_fd, &rset); maxfd = (bl_watch_fd>bl_pipe_fd)?bl_watch_fd:bl_pipe_fd; switch (select(maxfd + 1, &rset, NULL, NULL, NULL)) { case -1: if (errno == EINTR) continue; else { ret = -errno; goto out; } case 0: goto out; default: if (FD_ISSET(bl_watch_fd, &rset)) bl_rpcpipe_cb(); else if (bl_pipe_fd > 0 && FD_ISSET(bl_pipe_fd, &rset)) ret = bl_disk_inquiry_process(bl_pipe_fd); if (ret) goto out; } } out: return ret; } void sig_die(int signal) { if (pidfd >= 0) { close(pidfd); unlink(PID_FILE); } BL_LOG_ERR("exit on signal(%d)\n", signal); exit(1); } /* Daemon */ int main(int argc, char **argv) { int opt, dflag = 0, fg = 0, ret = 1; struct stat statbuf; char pidbuf[64]; while ((opt = getopt(argc, argv, "df")) != -1) { switch (opt) { case 'd': dflag = 1; break; case 'f': fg = 1; break; } } if (fg) { openlog("blkmapd", LOG_PERROR, 0); } else { if (!stat(PID_FILE, &statbuf)) { fprintf(stderr, "Pid file %s already existed\n", PID_FILE); exit(1); } if (daemon(0, 0) != 0) { fprintf(stderr, "Daemonize failed\n"); exit(1); } openlog("blkmapd", LOG_PID, 0); pidfd = open(PID_FILE, O_WRONLY | O_CREAT, 0644); if (pidfd < 0) { BL_LOG_ERR("Create pid file %s failed\n", PID_FILE); exit(1); } if (lockf(pidfd, F_TLOCK, 0) < 0) { BL_LOG_ERR("Lock pid file %s failed\n", PID_FILE); close(pidfd); exit(1); } ftruncate(pidfd, 0); sprintf(pidbuf, "%d\n", getpid()); write(pidfd, pidbuf, strlen(pidbuf)); } signal(SIGINT, sig_die); signal(SIGTERM, sig_die); signal(SIGHUP, SIG_IGN); if (dflag) { bl_discover_devices(); exit(0); } if ((bl_watch_fd = inotify_init()) < 0) { BL_LOG_ERR("init inotify failed %s\n", strerror(errno)); exit(1); } /* open pipe file */ bl_watch_dir(RPCPIPE_DIR, &rpc_pipedir_wfd); bl_watch_dir(NFSPIPE_DIR, &nfs_pipedir_wfd); bl_pipe_fd = open(BL_PIPE_FILE, O_RDWR); if (bl_pipe_fd < 0) BL_LOG_ERR("open pipe file %s failed: %s\n", BL_PIPE_FILE, strerror(errno)); while (1) { /* discover device when needed */ bl_discover_devices(); ret = bl_event_helper(); if (ret < 0) { /* what should we do with process error? */ BL_LOG_ERR("inquiry process return %d\n", ret); } } if (pidfd >= 0) { close(pidfd); unlink(PID_FILE); } exit(ret); }