/*
 * Copyright 2015 Canonical Ltd.
 *
 * This program is free software: you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 3, as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranties of
 * MERCHANTABILITY, SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR
 * PURPOSE.  See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Authors:
 *     Lars Uebernickel <lars.uebernickel@canonical.com>
 */

#include <gio/gio.h>
#include <stdlib.h>
#include <locale.h>

enum
{
  ADMIN1_ID = 0,
  ADMIN1_NAME,
  ADMIN1_ASCII_NAME,
  ADMIN1_UNKNOWN
};

enum
{
  CITIES_ID = 0,
  CITIES_NAME,
  CITIES_ASCIINAME,
  CITIES_ALTERNATE_NAMES,
  CITIES_LATITUDE,
  CITIES_LONGITUDE,
  CITIES_FEATURE_CLASS,
  CITIES_FEATURE_CODE,
  CITIES_COUNTRY_CODE,
  CITIES_ALTERNATE_COUNTRY_CODES,
  CITIES_ADMIN1,
  CITIES_ADMIN2,
  CITIES_ADMIN3,
  CITIES_ADMIN4,
  CITIES_POPULATION,
  CITIES_ELEVATION,
  CITIES_DEM,
  CITIES_TIMEZONE,
  CITIES_MODIFICATION_DATE
};

enum
{
  COUNTRIES_ISO,
  COUNTRIES_ISO3,
  COUNTRIES_ISO_NUMERIC,
  COUNTRIES_FIPS,
  COUNTRIES_NAME,
  COUNTRIES_CAPITAL,
  COUNTRIES_AREA,
  COUNTRIES_POPULATION,
  COUNTRIES_CONTINENT,
  COUNTRIES_TLD,
  COUNTRIES_CURRENCYCODE,
  COUNTRIES_CURRENCYNAME,
  COUNTRIES_PHONE,
  COUNTRIES_POSTAL_CODE_FORMAT,
  COUNTRIES_POSTAL_CODE_REGEX,
  COUNTRIES_LANGUAGES,
  COUNTRIES_GEONAMEID,
  COUNTRIES_NEIGHBOURS,
  COUNTRIES_EQUIVALENTFIPSCODE
};

static void
handle_admin1_line (gchar    **fields,
                    gpointer   user_data,
                    GError   **error)
{
  g_hash_table_insert (user_data, g_strdup (fields[ADMIN1_ID]), g_strdup (fields[ADMIN1_NAME]));
}

static void
handle_country_line (gchar    **fields,
                     gpointer   user_data,
                     GError   **error)
{
  g_hash_table_insert (user_data, g_strdup (fields[COUNTRIES_ISO]), g_strdup (fields[COUNTRIES_NAME]));
}

static GVariant *
variant_new_normalize_string (const gchar *str)
{
  gchar *normalized;

  normalized = g_utf8_normalize (str, -1, G_NORMALIZE_ALL_COMPOSE);

  return g_variant_new_take_string (normalized);
}

typedef struct
{
  GHashTable *admin1;
  GHashTable *countries;
  GVariantBuilder builder;
} CityData;

static void
handle_city_line (gchar    **fields,
                  gpointer   user_data,
                  GError   **error)
{
  CityData *data = user_data;
  g_autofree gchar *index = NULL;
  const gchar *admin1;
  const gchar *country;

  /* only include cities and villages and ignore sections of other places (PPLX) */
  if (fields[CITIES_FEATURE_CLASS][0] != 'P' ||
      g_str_equal (fields[CITIES_FEATURE_CODE], "PPLX"))
    return;

  /* The documentation states that "00" is used for cities without a
   * specified admin1 zone. However, it is sometimes set to the empty
   * string (or even other, non-existing codes). This tool is not useful
   * for integrity checks anyway, so just ignore anything that's not in
   * admin1Codes.txt
   */
  index = g_strdup_printf ("%s.%s", fields[CITIES_COUNTRY_CODE], fields[CITIES_ADMIN1]);
  admin1 = g_hash_table_lookup (data->admin1, index);
  if (admin1 == NULL)
    return;

  /* However, do discard cities without associated countries */
  country = g_hash_table_lookup (data->countries, fields[CITIES_COUNTRY_CODE]);
  if (country == NULL)
    return;

  g_variant_builder_add (&data->builder, "(@s@s@s@su@sdd)",
                         variant_new_normalize_string (fields[CITIES_NAME]),
                         variant_new_normalize_string (admin1),
                         variant_new_normalize_string (country),
                         variant_new_normalize_string (fields[CITIES_TIMEZONE]),
                         strtoul (fields[CITIES_POPULATION], NULL, 10),
                         variant_new_normalize_string (fields[CITIES_COUNTRY_CODE]),
                         g_ascii_strtod (fields[CITIES_LATITUDE], NULL),
                         g_ascii_strtod (fields[CITIES_LONGITUDE], NULL));
}

static gboolean
parse_geo_names_file (GFile     *file,
                      guint      n_columns,
                      void     (*callback) (gchar **, gpointer, GError **),
                      gpointer   user_data,
                      GError   **error)
{
  g_autoptr(GFileInputStream) filestream = NULL;
  g_autoptr(GDataInputStream) datastream = NULL;
  guint line_nr = 0;

  filestream = g_file_read (file, NULL, error);
  if (filestream == NULL)
    return FALSE;

  datastream = g_data_input_stream_new (G_INPUT_STREAM (filestream));

  for (;;)
    {
      g_autofree gchar *line = NULL;
      gsize length;
      g_auto(GStrv) fields = NULL;

      line = g_data_input_stream_read_line_utf8 (datastream, &length, NULL, error);
      if (line == NULL)
        return length == 0 ? TRUE : FALSE;

      line_nr++;

      if (line[0] == '#')
        continue;

      fields = g_strsplit (line, "\t", 0);
      if (g_strv_length (fields) != n_columns)
        {
          g_set_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA,
                       "line %u doesn't contain %u fields", line_nr, n_columns);
          return FALSE;
        }

      callback (fields, user_data, error);
    }

  return TRUE;
}

int
main (int argc, char **argv)
{
  g_autoptr(GFile) dir = NULL;
  g_autoptr(GFile) admin1_file = NULL;
  g_autoptr(GFile) countries_file = NULL;
  g_autoptr(GFile) cities_file = NULL;
  g_autoptr(GError) error = NULL;
  g_autoptr(GVariant) v = NULL;
  CityData data;

  setlocale (LC_ALL, "");

  dir = g_file_new_for_path (argc == 2 ? argv[1] : ".");
  admin1_file = g_file_get_child (dir, "admin1Codes.txt");
  countries_file = g_file_get_child (dir, "countryInfo.txt");
  cities_file = g_file_get_child (dir, "cities15000.txt");

  data.admin1 = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
  data.countries = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
  g_variant_builder_init (&data.builder, G_VARIANT_TYPE ("a(ssssusdd)"));

  if (!parse_geo_names_file (admin1_file, 4, handle_admin1_line, data.admin1, &error))
    {
      g_printerr ("Unable to read admin file: %s\n", error->message);
      return 1;
    }

  if (!parse_geo_names_file (countries_file, 19, handle_country_line, data.countries, &error))
    {
      g_printerr ("Unable to read countries file: %s\n", error->message);
      return 1;
    }

  if (!parse_geo_names_file (cities_file, 19, handle_city_line, &data, &error))
    {
      g_printerr ("Unable to read cities file: %s\n", error->message);
      return 1;
    }

  v = g_variant_builder_end (&data.builder);
  if (!g_file_set_contents ("cities.compiled", g_variant_get_data (v), g_variant_get_size (v), &error))
    {
      g_printerr ("Unable to write output: %s\n", error->message);
      return 1;
    }

  g_hash_table_unref (data.admin1);
  g_hash_table_unref (data.countries);

  return 0;
}
