ref: 3971bc6b760a0658b27d0dd0f46693129ff59164
parent: d42b5ae9b1b41d92fb88cffa6bdb930c2b476570
author: Noam Preil <noam@pixelhero.dev>
date: Mon Feb 26 13:01:45 EST 2024
remove partition
--- a/partitioner/design
+++ /dev/null
@@ -1,53 +1,0 @@
-Needs to:
-
-* Be given a list of disks
- * Raw disks, partitions, files
- * Disks can be partitioned
- * Files and partitions must be used as a full partition each
- * Disks may be empty
- * Can partition, or use whole disk as one region
- * Disks may contain partitions and empty space
- * In which case, may want to use part of the disk
- * Replace existing partitions
- * Use unused space
- * If neither, unusable
- * Or realize it's the wrong disk
-
-* Be given desired constraints
- * How many fossils, in what size range?
-
-* Determine usable regions
- * Size
- * Disk
- * Offset
- * Redundancy
- * Sequential performance
- * Random-access performance
-
-* Determine what to do with each region within constraints
-v1:
- * Have bloom filter if there's enough space
- * Allocate five fossils, each 1% of total space
- * Statically size index and arenas as percentages of the total remaining space
- * Create one index section, one arena partition, bloom filter, and fossils
- * Try to give index minimal random read latency, and arena maximum sequential read speed
- * No interactivity
-
-v2:
- * Give the user the ability to edit the plan before executing it
-
-v3:
- * Talk with /dev/fs, take into account redundancy
- * Create multiple index sections and arena partitions, as needed
- * In order of highest to lowest priority:
- * Try not to use arena disks for anything else
- * Try not to use fossil disks for anything else
- * Try not to use index disks for anything else
-
-
-* Create the partitions
-* Format the partitions
-* Generate the venti configuration file
-
-
-
--- a/partitioner/linux.c
+++ /dev/null
@@ -1,74 +1,0 @@
-#include "platform.h"
-#include "partitioner.h"
-#include <stdio.h>
-#include <sys/stat.h>
-#include <sys/sysmacros.h>
-
-static const char *types[] = { "unknown", "bad", "disk", "partition", "file", };
-
-/* Resolves partition to disk offset */
-static void
-probepart(Region *r)
-{
- char *startpath = aprintf("/sys/class/block/%s/start", r->uuid);
- char *str = readfile(startpath, NULL);
- free(startpath);
- r->offset = atoll(str);
- free(str);
- r->type = disk;
-}
-
-static char*
-extract(char *file, char *key)
-{
- char *val = strstr(file, key);
- val += strlen(key) + 1;
- return strndup(val, strchr(val, '\n') - val);
-}
-
-static void
-probeblock(Region *r, struct stat buf)
-{
- char *devpath = aprintf("/sys/dev/block/%d:%d/uevent", major(buf.st_rdev), minor(buf.st_rdev));
- char *contents = readfile(devpath, NULL);
- char *devtype;
- free(devpath);
- devtype = extract(contents, "DEVTYPE");
- r->uuid = extract(contents, "DEVNAME");
- free(contents);
- r->type = streql(devtype, "disk") ? disk : part;
- if(r->type == part)
- probepart(r);
-}
-
-void
-proberegiontype(Region *r)
-{
- struct stat buf;
- if(stat(r->path, &buf) != 0 || !(buf.st_mode & (S_IFMT | S_IFBLK)))
- r->type = bad;
- else if(buf.st_mode & S_IFREG)
- r->type = file;
- else
- probeblock(r, buf);
-}
-
-void
-proberegionredundancy(Region *r)
-{
- /* TODO: integrate with mdadm? LUKS? cryptsetup? All of the above? */
- (void)r;
-}
-
-int
-ispartofdisk(Region *maybedisk, Region *maybepart)
-{
- if(maybedisk->type != disk || maybepart->type != disk)
- return 0;
- char *partpath = aprintf("/sys/class/block/%s/%s/partition", maybedisk->uuid, maybepart->uuid);
- char *contents = readfile(partpath, NULL);
- free(partpath);
- free(contents);
- return contents != NULL;
-}
-
--- a/partitioner/main.c
+++ /dev/null
@@ -1,360 +1,0 @@
-#include "platform.h"
-#include "partitioner.h"
-#include <stdio.h>
-
-static const char *types[] = { "unknown", "bad", "disk", "partition", "file", };
-static const char *uses[] = { "undecided","none","arena","index","bloom","index and bloom", "fossil" };
-
-const char *usage = "%s [disk1] [disk2] [...diskn]";
-
-int ispartofdisk(Region *disk, Region *maybepart);
-
-static struct {
- int usebloom;
- size_t indexsize, arenasize;
- size_t regioncount;
- Region *regions;
- size_t fossilcount;
- size_t fossilsize;
- int *fds;
-} state = {
- .usebloom = 0,
-};
-
-static void
-parseargs(int argc, char **argv)
-{
- size_t offset = 1;
- state.fossilcount = 0;
- state.fossilsize = 10*(size_t)1024*1024*1024;
- state.regioncount = argc - 1;
- state.regions = calloc(sizeof(Region), state.regioncount);
- for(int i = 1; i < argc; i += 1){
- if(streql(argv[i], "-f")){
- if(state.fossilcount == 5)
- sysfatal("Cannot give -f multiple times");
- state.fossilcount = 5;
- offset = 2;
- continue;
- }
- state.regions[i - offset].path = argv[i];
- }
- state.regioncount = argc - offset;
-}
-
-static void
-roundup(size_t *val, size_t base)
-{
- size_t abovebase = *val % base;
- if(abovebase == 0)
- return;
- /* The number is *not* a multiple of the base. Removing the modulus reduces to the
- * prior multiple; adding the base then moves it to the next one. */
- *val = *val - abovebase + base;
-}
-
-/* If directory containing 'ctl', disk. Otherwise, raw file [or partition, they're treated identically] */
-static void
-opendisks(void)
-{
- printf("Probing disk presence...\n");
- for(size_t i = 0; i < state.regioncount; i += 1){
- state.regions[i].fd = open(state.regions[i].path, OREAD | __O_DIRECT);
- state.regions[i].size = fdsize(state.regions[i].fd);
- }
-}
-
-static void
-removeregion(int(*check)(size_t index), char *hint)
-{
- size_t newcount = 0;
- size_t i;
- Region *newregions = calloc(sizeof(Region), state.regioncount);
- if(newregions == NULL)
- sysfatal("Insufficient memory");
- for(i = 0; i < state.regioncount; i += 1)
- if(!check(i)){
- newregions[newcount] = state.regions[i];
- newcount += 1;
- } else{
- printf("Eliding %s, which is %s\n", state.regions[i].path, hint);
- }
- state.regioncount = newcount;
- free(state.regions);
- state.regions = newregions;
-}
-
-static int
-istoosmall(size_t index)
-{
- return state.regions[index].size < MinSize || state.regions[index].size == (size_t)-1;
-}
-
-static int
-isclosed(size_t index)
-{
- return state.regions[index].fd < 0;
-}
-
-static int
-isunused(size_t index)
-{
- return state.regions[index].type == bad;
-}
-
-static int
-isduplicate(size_t index)
-{
- for(size_t i = 0; i < state.regioncount; i += 1){
- if(i == index)
- continue;
- if(streql(state.regions[index].path, state.regions[i].path))
- return 1;
- }
- return 0;
-}
-
-static int
-issubregion(size_t index)
-{
- for(size_t i = 0; i < state.regioncount; i += 1)
- if(ispartofdisk(&state.regions[i], &state.regions[index]))
- return 1;
- return 0;
-}
-
-static size_t
-totalsize(void)
-{
- size_t size = 0;
- for(size_t i = 0; i < state.regioncount; i += 1)
- size += state.regions[i].size;
- return size;
-}
-
-static void
-dump(size_t *indices)
-{
- size_t j = 0;
- printf("\tTotal size: %.2fMiB\n", (double)totalsize() / 1024 / 1024);
- for(size_t i = 0; i < state.regioncount; i += 1){
- if(indices == NULL || indices[j] == i){
- printf("\t\tRegion %s", state.regions[i].path);
- if(state.regions[i].offset > 0)
- printf(":%luMiB", state.regions[i].offset/1024/1024);
- printf(", uuid %s, size %.2f MiB, %uMiB/s seqread, %uus latency, %s, %d redundancy, %s, %s\n", state.regions[i].uuid, (double)state.regions[i].size/1024/1024, state.regions[i].seqspeed, state.regions[i].randlatency, types[state.regions[i].type], state.regions[i].redundancy, uses[state.regions[i].use], state.regions[i].why != NULL ? state.regions[i].why : "CAUSE UNKNOWN");
- j += 1;
- }
- }
-}
-
-static size_t
-roffset(Region *r, int random, size_t alignment, size_t size)
-{
- size_t offset;
- if(random)
- offset = rand() % r->size;
- else
- offset = lseek(r->fd, size, SEEK_CUR) - size;
- roundup(&offset, alignment);
- return offset;
-}
-
-static void
-proberegionspeed(Region *r, int random)
-{
- long count = 0, ocount;
- char *buf = aligned_alloc(512, 0x2000);
- size_t offset;
- double t;
- if(buf == NULL)
- /* Continue without determining speed */
- return;
- delta();
- for(t = 0; t < 0.1; t += delta()){
- offset = roffset(r, random, random ? 512 : 4096, random ? 512 : 4096);
- ocount = pread(r->fd, buf, random ? 512 : 4096, offset);
- if(ocount < 0 && count == 0){
- fprintf(stderr, "Unable to probe %s read speed of '%s'\n", random ? "random" : "sequential", r->path);
- break;
- }
- count += ocount;
- }
- if(random)
- r->randlatency = (t * 1000 * 1000) / (count / 512);
- else
- r->seqspeed = count / (t * 1000 * 1000);
- free(buf);
-}
-
-static void
-proberegion(Region *r)
-{
- printf("\tProbing '%s'...\n", r->path);
- proberegiontype(r);
- proberegionspeed(r, 0);
- proberegionspeed(r, 1);
- proberegionredundancy(r);
-}
-
-static void
-proberegions(void)
-{
- if(state.regioncount == 0)
- sysfatal("No usable disks provided!");
- printf("Probing disks...\n");
- for(size_t i = 0; i < state.regioncount; i += 1)
- proberegion(&state.regions[i]);
-}
-
-static void
-reduceregion(size_t r, size_t size)
-{
- /* If the region happens to be exactly the size intended, don't split it */
- if(state.regions[r].size == size)
- return;
- Region *newregions = calloc(sizeof(Region), state.regioncount + 1);
- if(newregions == NULL)
- sysfatal("Insufficient memory");
- memcpy(newregions, state.regions, sizeof(Region) * state.regioncount);
- free(state.regions);
- state.regions = newregions;
- state.regions[state.regioncount] = state.regions[r];
- state.regions[r].size = size;
- state.regions[state.regioncount].offset += size;
- state.regions[state.regioncount].size -= size;
- state.regions[state.regioncount].use = undecided;
- state.regioncount += 1;
-}
-
-static void
-decidebloom(void)
-{
- state.usebloom = totalsize() > ((size_t)32 * 1024 * 1024 * 1024);
- printf("Bloom filter: %s...\n", state.usebloom ? "enabled" : "disabled");
-}
-
-static void
-decidesizes(void)
-{
- size_t total = totalsize() - (state.usebloom ? 512 * 1024 * 1024 : 0);
- state.fossilsize = totalsize() / 100;
- if(state.fossilcount > 0 && state.fossilsize < 512 * 1024 * 1024)
- sysfatal("Not enough space for state.fossilcount!");
- total -= state.fossilcount * state.fossilsize;
- state.indexsize = total * 15 / 105;
- roundup(&state.indexsize, 512*1024*1024);
- state.arenasize = total - state.indexsize;
-}
-
-/* if minsize == -1, finds the largest unused region
- * Otherwise, picks a random unused region >= minsize */
-static size_t
-findunusedregion(size_t minsize)
-{
- size_t largest = -1, largestsize = 0;
- for(size_t i = 0; i < state.regioncount; i += 1){
- if(state.regions[i].use != undecided)
- continue;
- if(minsize != (size_t)-1 && state.regions[i].size >= minsize)
- return i;
- if(minsize == (size_t)-1 && state.regions[i].size > largestsize){
- largest = i;
- largestsize = state.regions[i].size;
- }
- }
- return largest;
-}
-
-static void
-picklargest(size_t minsize, int use)
-{
- size_t i = findunusedregion(minsize);
- if(i == (size_t)-1){
- dump(NULL);
- sysfatal("no region big enough for %s partition, needed size %ldMiB", uses[use], minsize/1024/1024);
- }
- reduceregion(i, minsize);
- state.regions[i].why = "it was the first region found which was sufficiently large";
- state.regions[i].use = use;
-}
-
-/* As picklargest, but will split among regions as needed */
-static void
-scatterpicks(size_t minsize, int use)
-{
- size_t i = findunusedregion(minsize);
- if(i == (size_t)-1){
- dump(NULL);
- sysfatal("no region big enough for %s partition, needed size %ldMiB", uses[use], minsize/1024/1024);
- }
- reduceregion(i, minsize);
- state.regions[i].why = "it was the first region found which was sufficiently large";
- state.regions[i].use = use;
-}
-
-static void
-plan(void)
-{
- printf("Planning...\n");
- decidebloom();
- decidesizes();
- scatterpicks(state.arenasize, arena);
- scatterpicks(state.indexsize, index);
- if(state.usebloom)
- picklargest(512*1024*1024, bloom);
- for(size_t f = 0; f < state.fossilcount; f += 1)
- picklargest(state.fossilsize, fossil);
-}
-
-static void
-partition(void)
-{
- printf("Partitioning...\n");
- sysfatal("TODO");
-}
-
-static void
-format(void)
-{
- printf("Formatting...\n");
- sysfatal("TODO");
-}
-
-static void
-execute(void)
-{
- partition();
- format();
-}
-
-static void
-verify(void)
-{
- char buf[3];
- printf("Plan:\n");
- dump(NULL);
- printf("Proceed? ");
- fflush(stdout);
- if(fgets(buf, 3, stdin) == NULL || (buf[0] != 'y' && buf[0] != 'Y'))
- sysfatal("Aborting!");
-}
-
-int
-main(int argc, char **argv)
-{
- parseargs(argc, argv);
- opendisks();
- /* don't probe bad regions; however, without probing, some duplicates cannot be detected */
- /* so, first remove bad regions; then probe; then do another check for duplicates */
- removeregion(isclosed, "not a valid disk");
- removeregion(isunused, "useless");
- removeregion(isduplicate, "duplicates");
- removeregion(istoosmall, "too small");
- proberegions();
- removeregion(issubregion, "a subregion of another, known region");
- plan();
- verify();
- execute();
- return 0;
-}
--- a/partitioner/partitioner.h
+++ /dev/null
@@ -1,26 +1,0 @@
-typedef struct {
- char *path;
- int fd;
- size_t offset, size;
- enum { unknown, bad, disk, part, file } type;
- enum { undecided, none, arena, index, bloom, indexbloom, fossil } use;
- /* Random read latency; unit: average microseconds to read one 512-byte block */
- uint32_t randlatency;
- /* Sequential read speed; unit: MiB/s when reading 4K blocks */
- uint32_t seqspeed;
- /* The number of drives that must fail to bring down this region */
- uint16_t redundancy;
- /* The reason the region is being used the way it is */
- char *why;
- /* device-specific unique identifier */
- char *uuid;
-} Region;
-
-enum{
- /* Minimum size for a partition to be considered for usage */
- MinSize = 16 * 1024 * 1024,
-};
-
-void proberegiontype(Region *r);
-void proberegionredundancy(Region *r);
-