shithub: sl

ref: a70379d7e4b822f532fb0a8ccdd1624a90b64a68
dir: /src/ios.h/

View raw version
// this flag controls when data actually moves out to the underlying I/O
// channel. in-memory io is a special case of this where the data
// never moves out.
typedef enum {
	bm_none,
	bm_line,
	bm_block,
	bm_mem,
}ios_bm;

typedef enum {
	bst_none,
	bst_rd,
	bst_wr,
	bst_closed,
}ios_bst;

#define IOS_INLSIZE 128
#define IOS_BUFSIZE 32768

typedef struct {
	char *filename;
	u32int lineno;
	u32int colno;
}sl_loc;

typedef struct {
	u8int *buf; // start of buffer
	usize maxsize; // space allocated to buffer
	usize size; // length of valid data in buf, >=ndirty
	usize bpos; // current position in buffer
	usize ndirty; // # bytes at &buf[0] that need to be written
	soffset fpos; // cached file pos
	sl_loc loc;
	ios_bm bm;
	int colnowait;

	// the state only indicates where the underlying file position is relative
	// to the buffer. reading: at the end. writing: at the beginning.
	// in general, you can do any operation in any state.
	ios_bst state;

	int fd;

	bool readonly;
	bool writeonly;
	bool ownbuf;
	bool ownfd;
	bool _eof;

	// this means you can read, seek back, then read the same data
	// again any number of times. usually only true for files and strings.
	bool rereadable;

	u8int local[IOS_INLSIZE];
}sl_ios;

void *llt_memrchr(const void *s, int c, usize n) sl_purefn;

/* low-level interface functions */
usize ios_read(sl_ios *s, void *dest, usize n);
usize ios_write(sl_ios *s, const void *data, usize n);
soffset ios_seek(sl_ios *s, soffset pos);   // absolute seek
soffset ios_seek_end(sl_ios *s);
soffset ios_skip(sl_ios *s, soffset offs);  // relative seek
soffset ios_pos(sl_ios *s);  // get current position
int ios_trunc(sl_ios *s, soffset size);
bool ios_eof(sl_ios *s) sl_purefn;
int ios_flush(sl_ios *s);
void ios_close(sl_ios *s);
void ios_free(sl_ios *s);
u8int *ios_takebuf(sl_ios *s, usize *psize);  // null-terminate and release buffer to caller
// set buffer space to use
int ios_setbuf(sl_ios *s, u8int *buf, usize size, bool own);
int ios_bufmode(sl_ios *s, ios_bm mode);
void ios_set_readonly(sl_ios *s);
usize ios_copy(sl_ios *to, sl_ios *from, usize nbytes);
usize ios_copyall(sl_ios *to, sl_ios *from);
usize ios_copyuntil(sl_ios *to, sl_ios *from, u8int delim);

/* io creation */
sl_ios *ios_file(sl_ios *s, char *fname, bool rd, bool wr, bool create, bool trunc);
sl_ios *ios_mem(sl_ios *s, usize initsize);
sl_ios *ios_static_buffer(sl_ios *s, const u8int *buf, usize sz);
sl_ios *ios_fd(sl_ios *s, int fd, const char *name, bool isfile, bool own);

extern sl_ios *ios_stdin;
extern sl_ios *ios_stdout;
extern sl_ios *ios_stderr;

void ios_init_std(void);

/* high-level functions - output */
int ios_putrune(sl_ios *s, Rune r);
int ios_printf(sl_ios *s, const char *format, ...) sl_printfmt(2, 3);
int ios_vprintf(sl_ios *s, const char *format, va_list args) sl_printfmt(2, 0);

/* high-level io functions - input */
int ios_peekrune(sl_ios *s, Rune *r);
int ios_getrune(sl_ios *s, Rune *r);

// discard data buffered for reading
void ios_purge(sl_ios *s);

/* stdio-style functions */
#define IOS_EOF (-1)
int ios_putc(sl_ios *s, int c);
int ios_getc(sl_ios *s);
int ios_peekc(sl_ios *s);
#define ios_puts(s, str) ios_write(s, str, strlen(str))

/*
  With in-memory io, mixed reads and writes are equivalent to performing
  sequences of *p++, as either an lvalue or rvalue. File ios behave
  similarly, but other ios might not support this. Using unbuffered
  mode makes this more predictable.

  Note on "unget" functions:
  There are two kinds of functions here: those that operate on sized
  blocks of bytes and those that operate on logical units like "character"
  or "integer". The "unget" functions only work on logical units. There
  is no "unget n bytes". You can only do an unget after a matching get.
  However, data pushed back by an unget is available to all read operations.
  The reason for this is that unget is defined in terms of its effect on
  the underlying buffer (namely, it rebuffers data as if it had been
  buffered but not read yet). IOS reserves the right to perform large block
  operations directly, bypassing the buffer. In such a case data was
  never buffered, so "rebuffering" has no meaning (i.e. there is no
  correspondence between the buffer and the physical io).

  Single-bit I/O is able to write partial bytes ONLY IF the io supports
  seeking. Also, line buffering is not well-defined in the context of
  single-bit I/O, so it might not do what you expect.

  implementation notes:
  in order to know where we are in a file, we must ensure the buffer
  is only populated from the underlying io starting with p==buf.

  to switch from writing to reading: flush, set p=buf, cnt=0
  to switch from reading to writing: seek backwards cnt bytes, p=buf, cnt=0

  when writing: buf starts at curr. physical io pos, p - buf is how
  many bytes we've written logically. cnt==0

  dirty == (bitpos>0 && state==iost_wr), EXCEPT right after switching from
  reading to writing, where we might be in the middle of a byte without
  having changed it.

  to write a bit: if !dirty, read up to maxsize-(p-buf) into buffer, then
  seek back by the same amount (undo it). write onto those bits. now set
  the dirty bit. in this state, we can bit-read up to the end of the byte,
  then formally switch to the read state using flush.

  design points:
  - data-source independence, including in-memory io
  - expose buffer to user, allow user-owned buffers
  - allow direct I/O, don't always go through buffer
  - buffer-internal seeking. makes seeking back 1-2 bytes very fast,
	and makes it possible for sockets where it otherwise wouldn't be
  - tries to allow switching between reading and writing
  - support 64-bit and large files
  - efficient, low-latency buffering
  - special support for utf8
  - type-aware functions with byte-order swapping service
  - position counter for meaningful data offsets with sockets

  theory of operation:

  the buffer is a view of part of a file/stream. you can seek, read, and
  write around in it as much as you like, as if it were just a string.

  we keep track of the part of the buffer that's invalid (written to).
  we remember whether the position of the underlying io is aligned
  with the end of the buffer (reading mode) or the beginning (writing mode).

  based on this info, we might have to seek back before doing a flush.

  as optimizations, we do no writing if the buffer isn't "dirty", and we
  do no reading if the data will only be overwritten.
*/