#include <err.h>
#include <stdio.h>
#include <stdint.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#define printd printf
void foo(const char *fmt, ...) __attribute__((regparm(0)));


#ifdef __x86_64__
/* defined according to the x86_64 ABI spec */
struct my_va_list {
	uint32_t gp_offset;	/* offset to next available gpr in reg_save_area */
	uint32_t fp_offset;	/* offset to next available fpr in reg_save_area */
	void *overflow_arg_area;	/* args that are passed on the stack */
	struct reg_save_area *reg_save_area;		/* register args */
	/*
	 * NOT part of the ABI. ->overflow_arg_area gets advanced when code
	 * iterates over the arguments with va_arg(). That means we need to
	 * keep a copy in order to free the allocated memory (if any)
	 */
	void *overflow_arg_area_save;
} __attribute__((packed));

typedef struct my_va_list machine_va_list;

struct reg_save_area {
	uint64_t rdi, rsi, rdx, rcx, r8, r9;
	/* XMM registers follow, but we don't use them */
};
#elif __i386__
typedef void *machine_va_list;
#endif


enum argument_class {
	ARGCLASS_NONE,
	ARGCLASS_INTEGER,
	ARGCLASS_FP,
	ARGCLASS_MEMORY,
	ARGCLASS_ERR,
};
static size_t
conversion_size(const char *fmt, enum argument_class *argclass)
{
	const char *p;
	size_t convsize, intsz;

	*argclass = ARGCLASS_ERR;
	if (fmt[0] != '%')
		return -1;

	convsize = -1;
	for (p = fmt + 1; p[0]; ++p) {
		int again = 0;
		/*
		 * Eat flags. Notice this will accept duplicate
		 * flags.
		 */
		switch (p[0]) {
		case '#':
		case '0':
		case '-':
		case ' ':
		case '+':
		case '\'':
			again = !0;
			break;
		}
		if (!again)
			break;
	}
	/* Eat minimum field width, if any */
	for (; isdigit(p[0]); ++p)
			;
	if (p[0] == '.')
		++p;
	/* Eat precision, if any */
	for (; isdigit(p[0]); ++p)
		;
	intsz = 0;
	switch (p[0]) {
	case 'h':
		if (p[1] == 'h') {
			++p;
			intsz = sizeof(char);
		} else {
			intsz = sizeof(short);
		}
		break;
	case 'l':
		if (p[1] == 'l') {
			++p;
			intsz = sizeof(long long);
		} else {
			intsz = sizeof(long);
		}
		break;
	case 'j':
		intsz = sizeof(intmax_t);
		break;
	case 't':
		intsz = sizeof(ptrdiff_t);
		break;
	case 'z':
		intsz = sizeof(size_t);
		break;
	default:
		p--;	/* Anticipate the ++p that follows. Yes, I know. Eeek. */
		break;
	}
	if (intsz == 0)
		intsz = sizeof(int);
	++p;

	switch (p[0]) {
	case 'c':
		/* for %c, we only store 1 byte in the ktr entry */
		convsize = sizeof(char);
		*argclass = ARGCLASS_INTEGER;
		break;
	case 'd':
	case 'i':
	case 'o':
	case 'u':
	case 'x':
	case 'X':
		convsize = intsz;
		*argclass = ARGCLASS_INTEGER;
		break;
	case 'p':
		convsize = sizeof(void *);
		*argclass = ARGCLASS_INTEGER;
		break;
	case 'f':
		if (p[-1] == 'l')
			convsize = sizeof(double);
		else
			convsize = sizeof(float);
		break;
		*argclass = ARGCLASS_FP;
	case 's':
		convsize = sizeof(char *);
		*argclass = ARGCLASS_INTEGER;
		break;
	case '%':
		convsize = 0;
		*argclass = ARGCLASS_NONE;
		break;
	default:
		fprintf(stderr, "Unknown conversion specifier %c "
			"in fmt starting with %s", p[0], fmt - 1);
		return -2;
	}
	return convsize;
}

#ifdef __x86_64__
void
reg_save_area_print(struct reg_save_area *regs)
{
#define preg(r)	printf("\t" #r "\t%ld\n", regs->r)
	preg(rdi);
	preg(rsi);
	preg(rdx);
	preg(rcx);
	preg(r8);
	preg(r9);
#undef preg
}

void
va_list_print(struct my_va_list *valist)
{
	printf("va_list (@%p)\n", valist);
	printf("\tgp_offset %d\n", valist->gp_offset);
	printf("\tfp_offset %d\n", valist->fp_offset);
	printf("\toverflow_arg_area %p\n", valist->overflow_arg_area);
	printf("\treg_save_area %p\n", valist->reg_save_area);
}


static int
va_list_push_integral(struct my_va_list *valist, void *val, size_t valsize,
		     size_t *stacksize)
{
	uint64_t r;

	switch (valsize) {
	case 1:
		r = *(uint8_t *)val; break;
	case 2:
		r = *(uint32_t *)val; break;
	case 4:
		r = (*(uint32_t *)val); break;
	case 8:
		r = *(uint64_t *)val; break;
	default:
		err(1, "WTF\n");
	}
	/* we always need to push the full 8 bytes */
	if ((valist->gp_offset + valsize) <= 48) {	/* got a free reg */

		memcpy(((char *)valist->reg_save_area + valist->gp_offset),
		       &r, sizeof(r));
		valist->gp_offset += sizeof(r);
		return 0;
	}
	/*
	 * Keep a pointer to the start of the allocated memory block so
	 * we can free it later
	 */
	if (!valist->overflow_arg_area_save)
		valist->overflow_arg_area_save = valist->overflow_arg_area;
	/* push to "stack" */
	if (!(valist->overflow_arg_area = realloc(valist->overflow_arg_area,
						  *stacksize + sizeof(r))))
		return -1;
	/*
	 * Keep a pointer to the start of the allocated memory block so
	 * we can free it later. We need to update it after every realloc().
	 */
	valist->overflow_arg_area_save = valist->overflow_arg_area;
	memcpy((char *)valist->overflow_arg_area + *stacksize, &r, sizeof(r));
	*stacksize += sizeof(r);
	return 0;
}

static void
va_list_rewind(struct my_va_list *valist)
{
	valist->gp_offset = 0;
}

static void
va_list_cleanup(machine_va_list *valist)
{
	if (valist->reg_save_area)
		free(valist->reg_save_area);
	if (valist->overflow_arg_area_save)
		free(valist->overflow_arg_area_save);
}

static int
va_list_from_blob(machine_va_list *valist, const char *fmt, char *blob, size_t blobsize)
{
	struct reg_save_area *regs;
	const char *f;
	size_t sz;

	if (!(regs = malloc(sizeof(*regs))))
		return -1;
	*valist = (struct my_va_list) {
		.gp_offset = 0,
		.fp_offset = 0,
		.overflow_arg_area = NULL,
		.reg_save_area = regs,
		.overflow_arg_area_save = NULL,
	};
	enum argument_class argclass;
	size_t stacksize = 0;

	for (f = fmt; *f != '\0'; ++f) {
		if (*f != '%')
			continue;
		sz = conversion_size(f, &argclass);
		if (argclass == ARGCLASS_INTEGER) {
			if (blobsize < sz) {
				fprintf(stderr, "not enough data available "
					"for format: %s", fmt);
				return -1;
			}
			if (va_list_push_integral(valist, blob, sz, &stacksize))
				return -1;
			blob += sz;
			blobsize -= sz;
		} else if (argclass != ARGCLASS_NONE)
			return -1;
		/* walk past the '%' */
		++f;
	}
	if (blobsize) {
		fprintf(stderr, "Couldn't consume all data for format %s "
			"(%zd bytes left over)\n", fmt, blobsize);
		return -1;
	}
	va_list_rewind(valist);
	return 0;
}
#elif __i386__

static void
va_list_cleanup(machine_va_list *valist)
{
	if (*valist)
		free(*valist);
}

static int
va_list_from_blob(machine_va_list *valist, const char *fmt, char *blob, size_t blobsize)
{
	const char *f;
	char *n;
	size_t bytes, sz;
	enum argument_class argclass;

	n = NULL;
	bytes = 0;
	for (f = fmt; *f != '\0'; ++f) {
		if (*f != '%')
			continue;
		sz = conversion_size(f, &argclass);
		if (blobsize < sz) {
			fprintf(stderr, "not enough data available "
				"for format: %s", fmt);
			return -1;
		}
		if ((argclass == ARGCLASS_INTEGER) && (sz < 4)) {
			int i = -1;	/* do C integer promotion */
			if (sz == 1)
				i = *(char *)blob;
			else
				i = *(short *)blob;
			if (!(n = realloc(n, bytes + 4)))
				return -1;
			memcpy(n + bytes, &i, sizeof(i));
			bytes += 4;
		} else {
			if (!(n = realloc(n, bytes + sz)))
				return -1;
			memcpy(n + bytes, blob, sz);
			bytes += sz;
		}
		blob += sz;
		blobsize -= sz;

	}
	if (blobsize) {
		fprintf(stderr, "Couldn't consume all data for format %s "
			"(%zd bytes left over)\n", fmt, blobsize);
		return -1;
	}
	*valist = n;
	return 0;
}
#else
#error "Don't know how to get a va_list on this platform"
#endif

#ifdef __x86_64__
int
main(void)
{
	struct my_va_list valist;
#if 0
	int blob[100];
	((int *)blob)[0] = 0x17;
	((int *)blob)[1] = 0x18;
	((int *)blob)[2] = 0x19;
	((int *)blob)[3] = 0xffeeddcc;
	((int *)blob)[4] = 0xbbaa9988;
	((int *)blob)[5] = 0x45;
	((int *)blob)[6] = 0x46;
	((int *)blob)[7] = 0x47;
	((int *)blob)[8] = 0x48;
#else
	char blob[100], *p;
	char i;
	i = 0;
	for (p = blob; p < (blob + sizeof(blob)); ++p, ++i) {
		p[0] = i;
	}
#endif


#define myfmt "%hd %zd %d %lx %#x %td %c %#llx %jx\n"
	if (va_list_from_blob(&valist, myfmt, (char *)blob, 2 + 8 + 4 + 8 + 4 + 8 + 1 + 8 + 8))
		err(1, "Couldn't create va_list\n");
	va_list_print(&valist);
	reg_save_area_print(valist.reg_save_area);
	//examine_va(NULL, &valist);
	vfprintf(stdout, myfmt, &valist);
	va_list_cleanup(&valist);
	return 0;
}
#elif __i386__
int main(void)
{
	machine_va_list valist;
	char blob[100], *p;
	char i;
	i = 0;
	for (p = blob; p < (blob + sizeof(blob)); ++p, ++i) {
		p[0] = i;
	}
#define myfmt "%hd %zd %d %lx %#x %td %c %#llx %jx\n"	
	if (va_list_from_blob(&valist, myfmt, (char *)blob, 2 + 4 + 4 + 4 + 4 + 4 + 1 + 8 + 8))
		err(1, "Couldn't create va_list\n");
	vfprintf(stdout, myfmt, (va_list)&valist);
	va_list_cleanup(&valist);
	return 0;
}
#endif
