#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

// Any line that begins with # is passed through unchanged
#define SKIPCPPLINES

// Any line that begins with ## is stripped of ##, then passed through unchanged
#define SKIPESCAPEDLINES

#define nl_check(ch, line, in, out) do { if (ch == '\n') { line += 1; line += cpp_check (in, out); } } while (0)

void open_files (int argc, char *argv[], FILE **in, FILE **out);
void process (FILE *in, FILE *out);
void handle_tag (FILE *in, FILE *out, int *line);
void handle_comment (FILE *in, FILE *out, int *line, bool *in_bad_comment_tag);
int cpp_check (FILE *in, FILE *out);
void clean_up (FILE *in, FILE *out);

int main (int argc, char *argv[])
{
	FILE *in, *out;

	// open_files calls exit() on failure
	open_files (argc, argv, &in, &out);
	process (in, out);
	clean_up (in, out);

	return 0;
}

void open_files (int argc, char *argv[], FILE **in, FILE **out)
{
	switch (argc)
	{
	case 1:
		printf ("%s: missing input and output filenames\n", argv[0]);
		exit (1);
	case 2:
		printf ("truequotes: missing output filename\n");
		exit (1);
	case 3:
		break;
	default:
		printf ("truequotes: %d too many arguments\n", argc - 3);
		exit (1);
	}

	*in = fopen (argv[1], "r");
	if (! *in)
	{
		printf ("truequotes: error opening input file %s\n", argv[1]);
		exit (1);
	}

	*out = fopen (argv[2], "w");
	if (! *out)
	{
		printf ("truequotes: error opening output file %s\n", argv[2]);
		exit (1);
	}
}

/* Comment tags get removed, because it makes it easier to prove correctness and the output files
   are not intended to be read or written by humans. Anything inside a tag is passed through without
   change.

   Microsoft Word works interactively. It has a simple rule that a quote after whitespace is a left
   quote, otherwise it is a right quote. Obviously, this is wrong. ("'Tricky' isn't the word I'd use.")
   But it seems to be a reasonable fascimile, and has the advantage that "bugs" earlier in the
   document don't cause problems later in the document. All HTML tags are treated as whitespace for
   this purpose. */

void process (FILE *in, FILE *out)
{
	const char LSQ[] = "&lsquo;", RSQ[] = "&rsquo;";
	const char LDQ[] = "&ldquo;", RDQ[] = "&rdquo;";

	bool prev_ws;
	int ch, line = 1;

	cpp_check (in, out);

	while (true)
	{
		ch = getc (in);
		if (feof (in))
			break;

		switch (ch)
		{
		case '<':
			ungetc (ch, in);
			handle_tag (in, out, &line);

			prev_ws = true;
			break;

		case '\'':
			fputs (prev_ws ? LSQ : RSQ, out);

			prev_ws = false;
			break;

		case '"':
			fputs (prev_ws ? LDQ : RDQ, out);

			prev_ws = false;
			break;

		default:
			putc (ch, out);

			prev_ws = isspace (ch);
			nl_check (ch, line, in, out);
		} // switch (ch)
	} // while (true)	
} // process (in, out)

/* There is a left-bracket ready to be read. Consume the next tag in
   the file, if any, or read to end of file. */

void handle_tag (FILE *in, FILE *out, int *line)
{
	int tag_start_line = *line;
	int ch;

	// Eat "<"
	getc (in);
	ch = getc (in);
	if (feof (in))
	{
		printf ("truequotes: unterminated tag starting on line %d", tag_start_line);
		return;
	}

	if (ch == '!')
	{
		/* A comment tag begins with <!, contains zero or more
		   comments, and ends with >. A comment begins with --,
		   contains zero or more characters but does not
		   contain --, and ends with --. */

		// Only report garbage once per comment tag
		bool in_bad_comment_tag = false;

		while (true)
		{
			ch = getc (in);
			if (feof (in))
			{
				printf ("truequotes: unterminated comment tag starting on line %d\n", tag_start_line);
				return;
			}

			switch (ch)
			{
			case '>':
				// End of comment tag
				return;

			case '-':
				ungetc (ch, in);
				handle_comment (in, out, line, &in_bad_comment_tag);
				break;

			default:
				if (! in_bad_comment_tag && ! isspace (ch))
				{
					printf ("truequotes: garbage in comment tag on line %d\n", *line);
					in_bad_comment_tag = true;
				}

				nl_check (ch, *line, in, out);
			} // switch ch
		} // while true
	} // if comment tag

	// Not a comment tag, pass it through
	else
	{
		bool in_dq = false;

		putc ('<', out);
		putc (ch, out);

		nl_check (ch, *line, in, out);

		while (true)
		{
			ch = getc (in);
			if (feof (in))
			{
				printf ("truequotes: premature EOF at line %d\n", *line);
				return;
			}
			putc (ch, out);

			switch (ch)
			{
			case '"':
				in_dq = ! in_dq;
				break;

			case '>':
				if (! in_dq)
					return;

			case '\n':
				nl_check (ch, *line, in, out);
			}
		} // while true
	} // else not comment tag
}

void handle_comment (FILE *in, FILE *out, int *line, bool *in_bad_comment)
{
	int comment_start_line;
	int ch;

	// Eat "-"
	getc (in);
	ch = getc (in);
	if (feof (in))
	{
		printf ("truequotes: bad comment on line %d\n", *line);
		return;
	}

	if (ch != '-')
	{
		if (! *in_bad_comment)
			printf ("truequotes: garbage in comment tag on line %d\n", *line);

		*in_bad_comment = true;
		nl_check (ch, *line, in, out);
		return;
	}

	// Look for a closing --
	comment_start_line = *line;

	while (true)
	{
		ch = getc (in);
		if (feof (in))
		{
			printf ("truequotes: comment starting line %d runs to EOF\n", comment_start_line);
			return;
		}

		nl_check (ch, line, in, out);

		if (ch == '-')
		{
			ch = getc (in);

			if (ch == '-')
				return;
			nl_check (ch, line, in, out);
		} // if ch == -
	} // while true
} // handle comment	

/* Check for consecutive lines beginning with #, and pass them through.
   Return count of lines passed through. */

#ifndef SKIPCPPLINES
int cpp_check (FILE *, FILE *)
{
	return 0;
}
#else
int cpp_check (FILE *in, FILE *out)
{
	int skipped = 0, escaped = 0;

	while (true)
	{
		int ch = getc (in);
		if (feof (in))
			break;

		if (ch != '#')
		{
			ungetc (ch, in);
			break;
		}

#ifdef SKIPESCAPEDLINES
		// Just read a #. If the next one is also a #, then eat it. Otherwise, write a # and put it back

		ch = getc (in);
		if (feof (in))
		{
			putc ('#', out);
			break;
		}
		if (ch != '#')
		{
			putc ('#', out);
			putc (ch, out);
		}
#else
		// Just read a #. Write it out

		putc ('#', out);
#endif

		// Read this line, and write it to out
		while (true)
		{
			ch = getc (in);
			if (feof (in))
				break;
			putc (ch, out);

			if (ch == '\n')
				break;
		}
		skipped++;
	}

	return skipped;
}
#endif

void clean_up (FILE *in, FILE *out)
{
	fclose (in);
	fclose (out);
}
