#!/usr/bin/perl
#
# Copyright (c) 2006 Robert Schumann
# Licensed under GNU GPL, see http://www.gnu.org/copyleft/gpl.html
# 
#
# This program is intended to parse the WAITTtermlys from 
#   http://groups.yahoo.com/group/rekenaarterme
# into a form suitable to be posted on a MoinMoin wiki (specifically, on
#   https://wiki.ubuntu.com/AfrikaansTranslators)
#
# In order to achieve this, the following text processing must occur:
#
# 1. Print out unchanged the first few lines of the file, up to a line starting with -
# 2. Each line starting with a non-whitespace is the start of a new word definition.
#    Change it from "A Definition\n" into "[[Anchor(ADefinition)]]\n A Definition:: "
# 3. Change {other def} into [#otherdef]
# 4. (( and )) replaced by ''', which in MoinMoin is boldface
# 5. <(.+)> replaced by italic form, ''<$1>''
# 6. Tidy up: replace "(\w)\n\t(\w)" with "$1 $2" and "\n\n\t" into "\n\t1.", which
#    causes different definitions of the same word to be MoinMoin auto-numbered.
# 7. Remove existing numbering: "\s\d\.\s" and "\bI+\b" become "".  At the moment there
#    is no Roman numbering above III
# 8. Bring dates onto the same line as the definition they refer to i.e. "\n(1999-12-14)"
#    becomes " (1999-12-14)"
#
# TODO by hand, before processing:
# * add a newline between "force" and "forseer"
# * add a tab in front of "kyk ook {sibling}"
# * add a newline between "invoke" and "in werking stel"
# * add a newline between "in my humble opinion (IMHO)" and "na my beskeie"
# * remove "END" from the last line of the file
# * "hang up" only has one bracket on the "(v))", change it to "((v))"


$ARGV[0] or die("Please supply the name of an input dictionary file on the command line");

open(FILE, $ARGV[0]) || die("can't open $ARGV[0]: $!");

# Number 1
@output[0] = "{{{";
while ((my $temp = <FILE>) !~ /^-/) {
	push @output,$temp;
}
push @output, "}}}";

# Read in all definitions, and then split on <newline><wordboundary>
my $wholefile = join ('', <FILE>);
my @alldefs = split(/\n\b/,$wholefile);

# Put all definitions into a hash.
my %dict = ();
foreach $def (@alldefs) {
	$def =~ /([^\n]+)(\n.*)/s;
	$dict{$1} = $2;
}

# Now process it all
foreach $word (sort keys %dict) {
	# Number 2
	$compact = $word;
	$compact =~ s/[\s\(\)-]/_/g; # this is the internal anchor link regexp
	$fancyword = join( "",("\n\n[[Anchor(", $compact, ")]]\n ", $word, ":: ") );

	$defstring = $dict{$word};
	$defstring=~s/\n\t(\(\d+-\d+-\d+\))/\t ''\1''/g; #' Number 8
	@defs = split(/\n\n\t/,$defstring);
	foreach $def (@defs) {
		chomp($def);
		$def=~s/\n\t/ /g; # Number 6
		$def=~s/^\s+//; # Number 6
		$def=~s{\{([^\}]+)\}}
		       {my $tmp = $1; $tmp=~s/[\s\(\)-]/_/g; "[#$tmp]";
		       }gsex; # Number 3; note internal anchor link
		$def=~s/[\(\)]{2}/'''/g; #' Number 4
		$def=~s/(<[^>]+>)/''\1''/g; # Number 5
		$def=~s/\s\d\.\s|\bI+\b/ /g; # Number 7
		$def = "\t1. $def" if $def; # Number 6
	}
	push @output, $fancyword;
	push @output, join("\n",@defs);
}

close INFO;

print "@output\n";
