Commit b4af45cd authored by Eddie Kohler's avatar Eddie Kohler

Prepare for journal.

- Allocate a 64-block journal in chickadeefs.img.
- Correctly set `superblock::njournal`.
- Correct replay.
- Replay becomes more verbose.
- Add --help to chickadeefsck/mkchickadeefs.
parent 6637b100
......@@ -218,7 +218,7 @@ chickadeeboot.img: $(OBJDIR)/mkchickadeefs $(OBJDIR)/bootsector $(OBJDIR)/kernel
chickadeefs.img: $(OBJDIR)/mkchickadeefs \
$(OBJDIR)/bootsector $(OBJDIR)/kernel $(DISKFS_CONTENTS) \
$(DISKFS_BUILDSTAMP)
$(call run,$(OBJDIR)/mkchickadeefs -b 32768 -f 16 -s $(OBJDIR)/bootsector $(OBJDIR)/kernel $(DISKFS_CONTENTS) > $@,CREATE $@)
$(call run,$(OBJDIR)/mkchickadeefs -b 32768 -f 16 -j 64 -s $(OBJDIR)/bootsector $(OBJDIR)/kernel $(DISKFS_CONTENTS) > $@,CREATE $@)
cleanfs:
$(call run,rm -f chickadeefs.img,RM chickadeefs.img)
......
......@@ -4,6 +4,7 @@
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <getopt.h>
#include <inttypes.h>
#include <vector>
#include <deque>
......@@ -235,7 +236,7 @@ void inodeinfo::finish_visit() {
void inodeinfo::visit_data(blocknum_t b, size_t idx, size_t sz) {
if (b != 0) {
if (verbose) {
ewprintf(" [%zu]: data block %u\n", idx, b);
printf(" [%zu]: data block %u\n", idx, b);
}
if (idx * blocksize >= sz) {
ewprintf("inode %u @%s [%zu]: warning: dangling block reference\n",
......@@ -311,7 +312,7 @@ void inodeinfo::visit_directory_data(blocknum_t b, size_t pos, size_t sz) {
void inodeinfo::visit_indirect(blocknum_t b, size_t idx, size_t sz) {
if (b != 0) {
if (verbose) {
ewprintf(" [%zu]: indirect block %u\n", idx, b);
printf(" [%zu]: indirect block %u\n", idx, b);
}
if (idx * blocksize >= sz) {
ewprintf("inode %u @%s [%zu]: warning: dangling indirect block reference\n",
......@@ -341,7 +342,7 @@ void inodeinfo::visit_indirect(blocknum_t b, size_t idx, size_t sz) {
void inodeinfo::visit_indirect2(blocknum_t b, size_t idx, size_t sz) {
if (b != 0) {
if (verbose) {
ewprintf(" [%zu]: indirect2 block %u\n", idx, b);
printf(" [%zu]: indirect2 block %u\n", idx, b);
}
if (idx * blocksize >= sz) {
ewprintf("inode %u @%s [%zu]: warning: dangling indirect2 block reference\n",
......@@ -376,8 +377,9 @@ struct ujournalreplayer : public chickadeefs::journalreplayer {
ujournalreplayer(unsigned char* disk);
void error(unsigned bi, const char* text) override;
void write_block(unsigned bn, unsigned char* buf) override;
void message(unsigned bi, const char* format, ...) override;
void error(unsigned bi, const char* format, ...) override;
void write_block(uint16_t tid, unsigned bn, unsigned char* buf) override;
void write_replay_complete() override;
};
......@@ -385,12 +387,37 @@ ujournalreplayer::ujournalreplayer(unsigned char* disk) {
disk_ = disk;
}
void ujournalreplayer::error(unsigned bi, const char* text) {
eprintf("journal block %u/%u: %s\n", bi, sb.nblocks - sb.journal_bn,
text);
void ujournalreplayer::message(unsigned bi, const char* format, ...) {
if (verbose) {
va_list val;
va_start(val, format);
printf("journal: ");
if (bi != -1U) {
printf("block %u/%u: ", bi, sb.njournal);
}
vprintf(format, val);
printf("\n");
va_end(val);
}
}
void ujournalreplayer::error(unsigned bi, const char* format, ...) {
printf("journal: ");
if (bi != -1U) {
printf("block %u/%u: ", bi, sb.njournal);
}
va_list val;
va_start(val, format);
vprintf(format, val);
printf("\n");
va_end(val);
++nerrors;
}
void ujournalreplayer::write_block(unsigned bn, unsigned char* buf) {
void ujournalreplayer::write_block(uint16_t tid, unsigned bn, unsigned char* buf) {
if (verbose) {
printf("journal transaction %u: replaying block %u\n", tid, bn);
}
memcpy(disk_ + bn * blocksize, buf, blocksize);
}
......@@ -403,9 +430,9 @@ void ujournalreplayer::write_replay_complete() {
static void replay_journal() {
// copy journal
size_t jsz = (sb.nblocks - sb.journal_bn) * blocksize;
size_t jsz = sb.njournal * blocksize;
unsigned char* jcopy = new unsigned char[jsz];
memcpy(jcopy, data + sb.nblocks * blocksize, jsz);
memcpy(jcopy, data + sb.journal_bn * blocksize, jsz);
// replay it
ujournalreplayer ujr(data);
......@@ -415,28 +442,56 @@ static void replay_journal() {
}
static void usage() {
fprintf(stderr, "Usage: chickadeefsck [-V] [-r] [IMAGE]\n");
exit(1);
static void __attribute__((noreturn)) usage() {
fprintf(stderr, "Usage: chickadeefsck [-V] [-s | --no-journal] [IMAGE]\n");
exit(2);
}
static void __attribute__((noreturn)) help() {
printf("Usage: chickadeefsck [-V] [-s | --no-journal] [IMAGE]\n\
Check the ChickadeeFS IMAGE for errors and exit with a status code\n\
indicating success.\n\
\n\
--verbose, -V print information about IMAGE\n\
--save-journal, -s replay journal into IMAGE\n\
--no-journal do not replay journal before checking image\n\
--help display this help and exit\n");
exit(0);
}
static struct option options[] = {
{ "verbose", no_argument, nullptr, 'V' },
{ "save", no_argument, nullptr, 's' },
{ "save-journal", no_argument, nullptr, 's' },
{ "no-journal", no_argument, nullptr, 'x' },
{ "help", no_argument, nullptr, 'h' },
{ nullptr, 0, nullptr, 0 }
};
int main(int argc, char** argv) {
bool replay = false;
bool no_journal = false;
int opt;
while ((opt = getopt(argc, argv, "Vr")) != -1) {
while ((opt = getopt_long(argc, argv, "Vs", options, nullptr)) != -1) {
switch (opt) {
case 'V':
verbose = true;
break;
case 'r':
case 's':
replay = true;
break;
case 'x':
no_journal = true;
break;
case 'h':
help();
default:
usage();
}
}
if (optind != argc && optind + 1 != argc) {
if ((optind != argc && optind + 1 != argc)
|| (replay && no_journal)) {
usage();
}
......@@ -445,12 +500,16 @@ int main(int argc, char** argv) {
int fd = STDIN_FILENO;
if (optind + 1 == argc && strcmp(argv[optind], "-") != 0) {
filename = argv[optind];
fd = open(filename, O_RDONLY);
fd = open(filename, replay ? O_RDWR : O_RDONLY);
if (fd == -1) {
fprintf(stderr, "%s: %s\n", filename, strerror(errno));
exit(1);
exit(2);
}
}
if (isatty(fd)) {
fprintf(stderr, "%s: Is a terminal\n", filename);
usage();
}
struct stat s;
int r = fstat(fd, &s);
......@@ -465,8 +524,9 @@ int main(int argc, char** argv) {
}
if (data == reinterpret_cast<unsigned char*>(MAP_FAILED)) {
if (replay) {
fprintf(stderr, "can't modify file to replay journal\n");
exit(1);
fprintf(stderr, "%s: %s (cannot save journal)\n",
filename, strerror(errno));
exit(2);
}
size = 0;
size_t capacity = 16384;
......@@ -484,7 +544,7 @@ int main(int argc, char** argv) {
break;
} else if (r == -1 && errno != EAGAIN) {
fprintf(stderr, "%s: %s\n", filename, strerror(errno));
exit(1);
exit(2);
} else if (r > 0) {
size += r;
}
......@@ -560,7 +620,7 @@ int main(int argc, char** argv) {
fbb = data + sb.fbb_bn * blocksize;
// check journal
if (sb.journal_bn < sb.nblocks) {
if (sb.journal_bn < sb.nblocks && !no_journal) {
replay_journal();
}
......
......@@ -13,6 +13,7 @@
#include <ctype.h>
#include <errno.h>
#include <assert.h>
#include <getopt.h>
#include <vector>
#include <random>
#include <algorithm>
......@@ -207,6 +208,35 @@ static void parse_uint32(const char* arg, uint32_t* val, int opt) {
*val = n;
}
static struct option options[] = {
{ "blocks", required_argument, nullptr, 'b' },
{ "inodes", required_argument, nullptr, 'i' },
{ "swap", required_argument, nullptr, 'w' },
{ "journal", required_argument, nullptr, 'j' },
{ "first-data", required_argument, nullptr, 'f' },
{ "random", no_argument, nullptr, 'r' },
{ "bootsector", required_argument, nullptr, 's' },
{ "output", required_argument, nullptr, 'o' },
{ "help", no_argument, nullptr, 'h' },
{ nullptr, 0, nullptr, 0 }
};
static void __attribute__((noreturn)) help() {
printf("Usage: mkchickadeefs [OPTS] [-o IMAGE] FILE...\n\
Create a ChickadeeFS image from the arguments.\n\
\n\
--blocks, -b N allocate N blocks (default 1024)\n\
--inodes, -i N allocate N inodes\n\
--swap, -w N allocate N blocks for swap space\n\
--journal, -j N allocate N blocks for journal\n\
--first-data, -f B allocate first file sequentially starting at block B\n\
--bootsector, -s FILE read FILE into the boot sector\n\
--randomize scramble block order before writing\n\
--output, -o IMAGE write output to IMAGE\n\
--help print this message and exit\n");
exit(0);
}
int main(int argc, char** argv) {
uint32_t first_datab = 0;
const char* bootsector = nullptr;
......@@ -248,6 +278,8 @@ int main(int argc, char** argv) {
}
outfile = optarg;
break;
case 'h':
help();
default:
fprintf(stderr, "unknown argument\n");
exit(1);
......@@ -315,6 +347,7 @@ int main(int argc, char** argv) {
sb2.nblocks = to_le(sb.nblocks);
sb2.nswap = to_le(sb.nswap);
sb2.ninodes = to_le(sb.ninodes);
sb2.njournal = to_le(sb.njournal);
sb2.swap_bn = to_le(sb.swap_bn);
sb2.fbb_bn = to_le(sb.fbb_bn);
sb2.inode_bn = to_le(sb.inode_bn);
......
......@@ -107,11 +107,25 @@ struct dirent {
};
using tid_t = uint16_t;
using tiddiff_t = int16_t;
inline bool tid_lt(tid_t x, tid_t y) {
return tiddiff_t(x - y) < 0;
}
inline bool tid_le(tid_t x, tid_t y) {
return tiddiff_t(x - y) <= 0;
}
inline bool tid_ge(tid_t x, tid_t y) {
return tiddiff_t(x - y) >= 0;
}
inline bool tid_gt(tid_t x, tid_t y) {
return tiddiff_t(x - y) > 0;
}
static constexpr uint64_t journalmagic = 0xFBBFBB009EEBCEEDUL;
static constexpr uint32_t nochecksum = 0x82600A5F;
static constexpr size_t ref_size = (nindirect - 7) / 3;
typedef uint16_t tid_t;
typedef int16_t tiddiff_t;
struct jblockref { // component of `jmetablock`
blocknum_t bn; // destination block number
......@@ -133,12 +147,15 @@ struct jmetablock {
inline bool is_valid_meta() const;
};
enum {
// jmetablock::flags bits
jf_meta = 0x01, // this block is a metablock (mandatory)
jf_error = 0x02,
jf_corrupt = 0x04,
jf_start = 0x10, // metablock starts a txn
jf_commit = 0x20, // metablock commits this txn
jf_complete = 0x40, // metablock marks this txn as complete
jf_start = 0x10, // metablock starts transaction `tid`
jf_commit = 0x20, // metablock commits `tid` (optional)
jf_complete = 0x40, // metablock marks `tid` complete (optional)
// jblockref::bflags bits
jbf_escaped = 0x100, // refblock is escaped in journal
jbf_nonjournaled = 0x200, // refblock is no longer journaled
jbf_overwritten = 0x400 // refblock overwritten in later txn
......@@ -166,10 +183,12 @@ struct journalreplayer {
// The following are callbacks called by `run()`.
// Report a progress message at journal block index `bi`.
virtual void message(unsigned bi, const char* format, ...);
// Report an error at journal block index `bi`.
virtual void error(unsigned bi, const char* text);
// Write the data in `buf` to block number `bn`.
virtual void write_block(blocknum_t bn, unsigned char* buf);
virtual void error(unsigned bi, const char* format, ...);
// Write the data in `buf` to block number `bn` (txn was `tid`).
virtual void write_block(tid_t tid, blocknum_t bn, unsigned char* buf);
// Called at the end of `run()`.
virtual void write_replay_complete();
......
......@@ -42,7 +42,10 @@ bool journalreplayer::analyze(unsigned char* jd, unsigned nblocks) {
for (unsigned bi = 0; bi != nb_; ++bi) {
analyze_block(bi);
}
if (!ok_ || !nmr_) {
if (!ok_) {
return false;
} else if (!nmr_) {
message(-1U, "no metablocks found");
return false;
}
......@@ -61,37 +64,37 @@ bool journalreplayer::analyze(unsigned char* jd, unsigned nblocks) {
error(mr_[mi].bi, "duplicate journal seqno");
ok_ = false;
}
if (tiddiff_t(cur->commit_boundary - last->commit_boundary) < 0) {
if (tid_lt(cur->commit_boundary, last->commit_boundary)) {
error(mr_[mi].bi, "journal commit_boundary backtracked");
ok_ = false;
}
if (tiddiff_t(cur->complete_boundary - last->complete_boundary) < 0) {
if (tid_lt(cur->complete_boundary, last->complete_boundary)) {
error(mr_[mi].bi, "journal complete_boundary backtracked");
ok_ = false;
}
}
if (tiddiff_t(cur->complete_boundary - cur->commit_boundary) > 0) {
if (tid_gt(cur->complete_boundary, cur->commit_boundary)) {
error(mr_[mi].bi, "journal complete_boundary above commit_boundary");
ok_ = false;
}
if (cur->nref
&& mi > 0
&& tiddiff_t(cur->tid - mr_[mi - 1].b->commit_boundary) < 0) {
&& tid_lt(cur->tid, mr_[mi - 1].b->commit_boundary)) {
error(mr_[mi].bi, "journal adds data to a committed transaction");
ok_ = false;
}
if (cur->nref
&& tiddiff_t(cur->tid - mr_[mi].b->complete_boundary) < 0) {
&& tid_lt(cur->tid, mr_[mi].b->complete_boundary)) {
error(mr_[mi].bi, "journal adds data to a completed transaction");
ok_ = false;
}
if ((cur->flags & jf_complete)
&& tiddiff_t(cur->tid - cur->complete_boundary) >= 0) {
&& tid_ge(cur->tid, cur->complete_boundary)) {
error(mr_[mi].bi, "completed transaction above complete_boundary");
ok_ = false;
}
if ((cur->flags & jf_commit)
&& tiddiff_t(cur->tid - cur->commit_boundary) >= 0) {
&& tid_ge(cur->tid, cur->commit_boundary)) {
error(mr_[mi].bi, "committed transaction above commit_boundary");
ok_ = false;
}
......@@ -107,6 +110,8 @@ bool journalreplayer::analyze(unsigned char* jd, unsigned nblocks) {
// The last valid metablock has the relevant boundaries.
tid_t complete_boundary = mr_[nmr_ - 1].b->complete_boundary;
tid_t commit_boundary = mr_[nmr_ - 1].b->commit_boundary;
message(-1U, "committable region is [%u,%u)", complete_boundary,
commit_boundary);
for (tid_t tid = complete_boundary; tid != commit_boundary; ++tid) {
analyze_tid(tid);
}
......@@ -116,8 +121,8 @@ bool journalreplayer::analyze(unsigned char* jd, unsigned nblocks) {
// should be ignored.
for (unsigned mx = nmr_; mx != 0; --mx) {
auto jmb = mr_[mx - 1].b;
if (tiddiff_t(jmb->tid - complete_boundary) >= 0
&& tiddiff_t(jmb->tid - commit_boundary) < 0) {
if (tid_ge(jmb->tid, complete_boundary)
&& tid_lt(jmb->tid, commit_boundary)) {
analyze_overwritten_blocks(mx);
}
}
......@@ -142,8 +147,9 @@ bool journalreplayer::is_potential_metablock(const unsigned char* jd) {
void journalreplayer::analyze_block(unsigned bi) {
assert(bi < nb_);
auto jd = jd_ + bi * blocksize;
auto jmb = reinterpret_cast<jmetablock*>(jd);
if (is_potential_metablock(jd)) {
auto jmb = reinterpret_cast<jmetablock*>(jd);
message(bi, "found potential metablock");
jmb->seq = from_le(jmb->seq);
jmb->tid = from_le(jmb->tid);
jmb->commit_boundary = from_le(jmb->commit_boundary);
......@@ -184,6 +190,14 @@ void journalreplayer::analyze_block(unsigned bi) {
mr_[x].b = jmb;
++nmr_;
}
} else if (from_le(jmb->magic) != 0) {
if (from_le(jmb->magic) != journalmagic) {
message(bi, "not a metablock (magic number %" PRIx64 ")",
from_le(jmb->magic));
} else {
message(bi, "not a metablock (checksum %x)",
from_le(jmb->checksum));
}
}
}
......@@ -216,36 +230,52 @@ void journalreplayer::analyze_tid(tid_t tid) {
for (unsigned mi = 0; mi != nmr_; ++mi) {
auto jmb = mr_[mi].b;
if (flags != 0
&& !(flags & jf_commit)
&& jmb->seq != tid_t(mr_[mi - 1].b->seq + 1)) {
error(mr_[mi].bi, "missing seq number in committable region");
ok_ = false;
unsigned tid_flags = 0;
if (flags != 0) {
auto want_seq = tid_t(mr_[mi - 1].b->seq + 1);
if (jmb->seq == want_seq) { // no seq numbers missing
if (tid_gt(jmb->commit_boundary, tid)) {
tid_flags |= jf_commit;
}
if (tid_gt(jmb->complete_boundary, tid)) {
tid_flags |= jf_complete;
}
} else if (!(flags & jf_commit)) {
error(mr_[mi].bi, "tid %u: seq number %u missing", tid, want_seq);
ok_ = false;
}
}
if (jmb->tid == tid) {
if (!!(jmb->flags & jf_start) != (flags == 0)) {
error(mr_[mi].bi, "jf_start flag in improper place");
tid_flags = jmb->flags;
if (tid_gt(jmb->commit_boundary, tid)) {
tid_flags |= jf_commit;
}
if (tid_gt(jmb->complete_boundary, tid)) {
tid_flags |= jf_complete;
}
if (!!(tid_flags & jf_start) != (flags == 0)) {
error(mr_[mi].bi, "tid %u: jf_start flag in improper place", tid);
ok_ = false;
}
if ((flags & jf_commit)
&& jmb->nref != 0) {
error(mr_[mi].bi, "transaction continues after jf_commit");
error(mr_[mi].bi, "tid %u: continues after commit", tid);
ok_ = false;
}
if (flags & jf_complete) {
error(mr_[mi].bi, "transaction continues after jf_complete");
error(mr_[mi].bi, "tid %u: continues after complete", tid);
ok_ = false;
}
if (jmb->flags & jf_complete) {
error(mr_[mi].bi, "transaction completes below complete_boundary");
if (tid_flags & jf_complete) {
error(mr_[mi].bi, "tid %u: completes below complete_boundary", tid);
ok_ = false;
}
flags |= jmb->flags;
flags |= tid_flags;
}
}
if (!(flags & jf_commit)) {
error(0, "missing committed transaction in committable region");
error(-1U, "tid %u in committable region is not committed", tid);
ok_ = false;
}
}
......@@ -288,8 +318,8 @@ void journalreplayer::run() {
tid_t commit_boundary = mr_[nmr_ - 1].b->commit_boundary;
for (unsigned mi = 0; mi != nmr_; ++mi) {
auto jmb = mr_[mi].b;
if (tiddiff_t(jmb->tid - complete_boundary) >= 0
&& tiddiff_t(jmb->tid - commit_boundary) < 0) {
if (tid_ge(jmb->tid, complete_boundary)
&& tid_lt(jmb->tid, commit_boundary)) {
unsigned delta = 1;
for (unsigned refi = 0; refi != jmb->nref; ++refi) {
auto& ref = jmb->ref[refi];
......@@ -302,7 +332,7 @@ void journalreplayer::run() {
uint64_t magic = to_le(journalmagic);
memcpy(djd, &magic, sizeof(magic));
}
write_block(from_le(ref.bn), djd);
write_block(jmb->tid, from_le(ref.bn), djd);
}
if (!(bflags & jbf_nonjournaled)) {
++delta;
......@@ -316,10 +346,13 @@ void journalreplayer::run() {
// `run()` callbacks
void journalreplayer::error(unsigned, const char*) {
void journalreplayer::message(blocknum_t, const char*, ...) {
}
void journalreplayer::error(unsigned, const char*, ...) {
}
void journalreplayer::write_block(blocknum_t, unsigned char*) {
void journalreplayer::write_block(tid_t, blocknum_t, unsigned char*) {
}
void journalreplayer::write_replay_complete() {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment