#!/usr/bin/perl
#This script takes a TAB DELIMITED text file that specifies group (species), name, description, latdec and longdec
#for locations (samples or populations) to be mapped in Google Earth or similar via a kml file.
#This script was originally written to process a comma seperated values text file but was changed to handle
#tab-delimitted text in order to allow commas to be included in the text descriptions for locations.
#This script makes a very simple kml file where samples/locations are grouped into folders according
#to a group id (e.g. species).
#There is no style encoding in the resulting kml file. Google Earth will use the defaults. For example
#the icon used to mark locations on the Google Earth map is not specified in the kml file and Google
#Earth will use its default yellow pushpin.
#Style elements can be edited in Google Earth.
#Note that there is very little error trapping.

#Dan Ebert September 2012.

use warnings;
use strict;
use File::Basename;

#The following prints info to stdout when the script is run without an argument (specifying the input file).
if(!defined $ARGV[0]){
	print "\n\n\nMissing argument!\n\n";
	print "Please enter the full filename for a tab-delimited input file\n";
	print "when invoking 'texttokml.pl'.\n\n";
	print "The input file must contain samples/locations as rows\n";
	print "with five columns of data for each:\n";
	print "\t1. Group ID (e.g. species).\n";
	print "\t2. Name (the id of the sample/population/location).\n";
	print "\t3. Description (other text about sample/population/location).\n";
	print "\t4. Latitude.\n";
	print "\t5. Longitude.\n";
	print "Latitude and longitude can be in any format that Google Earth\n";
	print "or other kml viewer/editor reads (i.e. not necessarily actual lat/longs).\n";
	print "This file must be a tab-delimited text file!\n";
	die "\n";
}

#assign the first argument provided by the user to an input file variable and open that file for input.
my $infile = shift;
open INFILE, "<$infile" or die "Couldn't open input file: $infile.\n$!";

#The following uses the fileparse function of File::Basename to get the path and name of the input file.
#Note that it will handle files with a .csv or .txt extension better than other files although those extensions
#are not actually required. The new file for output is then opened for writing.
my($outfile, $outpath, $inext) = fileparse($infile, ".csv", ".txt");
$outfile = "$outpath/$outfile.kml";
open OUTFILE, ">$outfile" or die "Couln't open output file: $outfile.\n$!";

#The following starts writing the required kml lines to the output file.
print OUTFILE "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; #required xml header line.
print OUTFILE "<kml xmlns=\"http://www.opengis.net/kml/2.2\">\n"; #required kml header line.
print OUTFILE "\t<Document>\n"; #first element of the simple kml file is a Document.

#I was going to cycle through the coloured pushpins with each group but decided not to bother
#my @googlepins = ("ylw-pushpin.png","blue-pushpin.png","grn-pushpin.png", "ltblu-pushpin.png", "pink-pushpin.png", "purple-pushpin.png", "red-pushpin.png", "wht-pushpin.png");

my $foldername = "blank";
my $ngroups = 0;
my $nsamples = 0;
#my $googlepin = "http://maps.google.com/mapfiles/kml/pushpin/ylw-pushpin.png";

#The following loop takes each line from the input file and reads 5 columns of data - group, name, description, lat, long.
#The if-then statement tests for new groups.
#The print lines are writing kml code.
while(<INFILE>){
	chomp;
	my @placedata = split('\t', $_); #split the line on the tab character - changed from comma delimited to allow for commas in text
	next if(scalar(@placedata)<5); #skip lines with fewer than 5 columns of data
	$nsamples ++;
	my $groupid = shift @placedata;
	my $name = shift @placedata;
	my $description = shift @placedata;
	my $latdec = shift @placedata;
	my $longdec = shift @placedata;
#	if($placedata >0){my $colour = shift @placedata};
	if($groupid ne $foldername){ #catch new groups/species
		$ngroups++;
		$foldername = $groupid;
		if($ngroups >1){print OUTFILE "\t\t</Folder>\n"}; #don't close the Folder tag on the first group
		print OUTFILE "\t\t<Folder>\n"; #new folder
		print OUTFILE "\t\t<name>$foldername</name>\n";
	}
	print OUTFILE "\t\t\t<Placemark>\n";
	print OUTFILE "\t\t\t\t<name>$name</name>\n";
	print OUTFILE "\t\t\t\t<description>$description.</description>\n";
	print OUTFILE "\t\t\t\t<Point>\n";
	print OUTFILE "\t\t\t\t\t<coordinates>$longdec,$latdec,0</coordinates>\n";
	print OUTFILE "\t\t\t\t</Point>\n";
	print OUTFILE "\t\t\t</Placemark>\n";
	print OUTFILE "\n";


}
print OUTFILE "\t\t</Folder>\n"; #close last folder tag
print OUTFILE "\t</Document>\n"; #close Document tag
print OUTFILE "</kml>\n"; #close kml tag.

print "\n\nDone.\n";
print "$nsamples samples from $ngroups groups written to $outfile.\n\n";
