// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
// Mobius Forensic Toolkit
// Copyright (C) 2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 Eduardo Aguiar
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the
// Free Software Foundation; either version 2, or (at your option) any later
// version.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
// Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#include <mobius/uri.h>
#include <mobius/string_functions.h>
#include <mobius/regex_impl.h>
#include <mobius/exception.inc>

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
// Syntax:
//
// 2.3. unreserved
//   A-Z, a-z, 0-9, "-", ".", "_", "~"
//
// 3.1. scheme
//   case insensitive
//
// 3.2. authority
//   [ userinfo "@" ] host [ ":" port] (3.2.1)
//   userinfo = (unreserved | pct-encoded | sub-delims | ":")
//      sometimes "user:password" (deprecated)
//   host = IP-literal | IPv4address | reg-name.
//     If it can be an IPv4 address then it is, instead of a reg-name (3.2.2)
//     case insensitive (3.2.2)
//     host allows '[' and ']' (IP-literal)
//     reg-name = (unreserved | pct-encoded | sub-delims)
//   port = (digit)*
//
//
// 3.3 path
//   dot-segments = "." and ".."
//   pchar = unreserved | pct-encoded | sub-delims | ":" | "@"
//   (pchar | "/")
//
// 3.4. query
//   (pchar | "/" | "?")*
//
// 3.5. fragment
//   (pchar | "/" | "?")*
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
namespace mobius
{
//! \brief RFC 3986 - section 2.3
static constexpr const char *UNRESERVED_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~";

//! \brief RFC 3986 - section 3.1
static constexpr const char *ALLOWED_SCHEME = UNRESERVED_CHARS;

//! \brief RFC 3986 - section 3.2
static constexpr const char *ALLOWED_AUTHORITY = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$&'()*+,;=:@[]";

//! \brief RFC 3986 - section 3.3
static constexpr const char *ALLOWED_PATH = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$&'()*+,;=:@/";

//! \brief RFC 3986 - section 3.4
static constexpr const char *ALLOWED_QUERY = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$&'()*+,;=:@/?";

//! \brief RFC 3986 - section 3.5
static constexpr const char *ALLOWED_FRAGMENT = ALLOWED_QUERY;

//! \brief RFC 3986 - appendix B - URI regular expression
static constexpr const char *URI_PATTERN = "(([A-Z][A-Z0-9+.-]*):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
static mobius::regex URI_REGEX (URI_PATTERN, REG_ICASE | REG_EXTENDED);

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief decode a string with %XX escape sequences
//! \param s encoded string
//! \param allowed allowed chars (not converted)
//! \return decoded string
//! \see RFC 3986 - section 2.1
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
static std::string
encode_triplets (const std::string& s, const std::string& allowed)
{
  std::string tmp;
  char pct_encoded[4] = {'%', 0, 0, 0};

  for (auto c : s)
    {
      if (allowed.find (c) == std::string::npos)
        {
          sprintf (pct_encoded + 1, "%02X", c);
          tmp += pct_encoded;
        }
      else
        tmp += c;
    }

  return tmp;
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief decode a string with %XX escape sequences
//! \param s encoded string
//! \return decoded string
//! \see RFC 3986 - section 2.1
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
static std::string
decode_triplets (const std::string& s)
{
  std::string::size_type begin = 0;
  std::string::size_type pos = s.find ('%', begin);
  std::string::size_type length = s.length ();
  std::string tmp;

  while (pos != std::string::npos)
    {
      tmp += s.substr (begin, pos - begin);

      if (pos + 2 < length)
        {
          tmp += char (stoi (s.substr (pos + 1, 2), nullptr, 16));
          begin = pos + 3;
        }
      else
        {
          tmp += '%';
          begin = pos + 1;
        }

      pos = s.find ('%', begin);
    }

  tmp += s.substr (begin);

  return tmp;
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief merge two URI paths
//! \param base base URI
//! \param rel relative URI
//! \return merged path
//! \see RFC 3986 - section 5.2.3
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
static std::string
merge_paths (const uri& base, const uri& rel)
{
  std::string path;

  if (!base.get_authority ().empty () && base.get_path ().empty ())
    path = '/' + rel.get_path ();

  else
    {
      std::string::size_type pos = base.get_path ().rfind ('/');

      if (pos == std::string::npos)
        path = rel.get_path ();

      else
        path = base.get_path ().substr (0, pos) + '/' + rel.get_path ();
    }

  return path;
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief remove dot segments
//! \param path URI path
//! \return path without "." and ".." dot segments
//! \see RFC 3986 - section 5.2.4
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
static std::string
remove_dot_segments (const std::string& path)
{
  std::string input (path);
  std::vector <std::string> segments;

  while (!input.empty ())
    {
      if (mobius::string::startswith (input, "../"))
        input = input.substr (3);

      else if (mobius::string::startswith (input, "./"))
        input = input.substr (2);

      else if (mobius::string::startswith (input, "/./"))
        input = '/' + input.substr (3);

      else if (input == "/.")
        input = '/';

      else if (mobius::string::startswith (input, "/../"))
        {
          input = '/' + input.substr (4);

          if (!segments.empty ())
            segments.pop_back ();
        }

      else if (input == "/..")
        {
          input = '/';

          if (!segments.empty ())
            segments.pop_back ();
        }

      else if (input == "." || input == "..")
        input.clear ();

      else
        {
          std::string::size_type begin = 0;

          if (!input.empty () && input[0] == '/')
            begin++;

          std::string::size_type pos = input.find ('/', begin);

          if (pos == std::string::npos)
            pos = input.length ();

          segments.push_back (input.substr (0, pos));
          input = input.substr (pos);
        }
    }

  std::string output;

  for (auto segment : segments)
    output += segment;

  return output;
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief build URI from a string
//! \param value URI as string
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
uri::uri (const std::string& value)
{
  set_value (value);
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief rebuild URI from its parts
//! \see RFC 3986 - section 5.3
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
void
uri::normalize () const
{
  value_.clear ();

  if (!scheme_.empty ())
    {
      value_ += encode_triplets (scheme_, ALLOWED_SCHEME);
      value_ += ':';
    }

  std::string authority = get_authority ();
  if (!authority.empty () || scheme_ == "file")
    {
      value_ += "//";
      value_ += encode_triplets (authority, ALLOWED_AUTHORITY);
    }

  value_ += encode_triplets (path_, ALLOWED_PATH);

  if (!query_.empty ())
    {
      value_ += "?";
      value_ += encode_triplets (query_, ALLOWED_QUERY);
    }

  if (!fragment_.empty ())
    {
      value_ += "#";
      value_ += encode_triplets (fragment_, ALLOWED_FRAGMENT);
    }
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief parse URI into its parts
//! \param value URI
//! \see RFC 3986 - section 3
// 2. Parsing must be called before decoding the percent-encoded triplets.
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
void
uri::set_value (const std::string& value)
{
  value_ = value;

  if (URI_REGEX.match (value))
    {
      set_scheme (mobius::string::tolower (decode_triplets (URI_REGEX[2])));
      set_authority (decode_triplets (URI_REGEX[4]));
      set_query (decode_triplets (URI_REGEX[7]));
      set_fragment (decode_triplets (URI_REGEX[9]));

      // if it is not a relative URI, remove dot segments - RFC 3986 - section 6.2.2.3
      std::string path = decode_triplets (URI_REGEX[5]);

      if ((!scheme_.empty () || !host_.empty ()) && !path.empty ())
        path = remove_dot_segments (path);

      set_path (path);
    }
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief build authority part from its subparts
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
std::string
uri::get_authority () const
{
  std::string value;

  // [username[:password]]
  if (!username_.empty ())
    {
      value += username_;

      if (!password_.empty ())
        {
          value += ':';
          value += password_;
        }

      value += '@';
    }

  // host
  value += host_;

  // [:port]
  if (!port_.empty ())
    {
      value += ':';
      value += port_;
    }

  return value;
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief set authority
//! \param authority authority
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
void
uri::set_authority (const std::string& value)
{
  std::string username;
  std::string password;
  std::string host;
  std::string port;

  // [username[:password]@]
  std::string::size_type pos = value.find ('@');
  std::string::size_type pos2;

  if (pos == std::string::npos)
    pos = 0;

  else
    {
      pos2 = value.find (':');

      if (pos2 == std::string::npos)
        username = value.substr (0, pos);

      else
        {
          username = value.substr (0, pos2);
          password = value.substr (pos2 + 1, pos - pos2 - 1);
        }

      pos++;
    }

  // host[:port]
  if (pos < value.length ())
    {
      if (value[pos] == '[')    // IPv6 | IPfuture
        {
          pos2 = value.find (']');

          if (pos2 == std::string::npos)        // unmatched ']'
            pos = value.length ();

          else
            {
              host = value.substr (pos, pos2 - pos + 1);
              pos = pos2 + 1;

              if (pos < value.length () && value[pos] == ':')
                port = value.substr (pos + 1);
            }
        }

      else                      // IPv4 | hostname
        {
          pos2 = value.find (':', pos);

          if (pos2 == std::string::npos)
            host = value.substr (pos);

          else
            {
              host = value.substr (pos, pos2 - pos);
              port = value.substr (pos2 + 1);
            }
        }
    }

  // set values
  set_username (decode_triplets (username));
  set_password (decode_triplets (password));
  set_host (mobius::string::tolower (decode_triplets (host)));
  set_port (decode_triplets (port));
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief join path to current path
//! \param path path to be added
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
void
uri::join_path (const std::string& path)
{
  if (path_.empty () || path_[path_.length () - 1] != '/')
    path_ += '/';

  path_ += path;
  is_modified = true;
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief get directory name from URI
//! \return directory name
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
std::string
uri::get_dirname () const
{
  std::string value;
  std::string path = encode_triplets (path_, ALLOWED_PATH);
  std::string::size_type pos = path.rfind ('/');

  if (pos != std::string::npos)
    {
      if (!scheme_.empty ())
        {
          value += encode_triplets (scheme_, ALLOWED_SCHEME);
          value += ':';
        }

      std::string authority = get_authority ();
      if (!authority.empty ())
        {
          value += "//";
          value += encode_triplets (authority, ALLOWED_AUTHORITY);
        }

      value += path.substr (0, pos);
    }

  return value;
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief get base filename from URI
//! \return base filename
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
std::string
uri::get_basename () const
{
  std::string::size_type pos = path_.rfind ('/');
  std::string value;

  if (pos != std::string::npos)
    value = path_.substr (pos + 1);

  return value;
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief get filename extension from URI
//! \return extension, if any
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
std::string
uri::get_extension () const
{
  std::string::size_type pos = path_.rfind ('.');
  std::string value;

  if (pos != std::string::npos)
    value = path_.substr (pos + 1);

  return value;
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief resolve an URI reference
//! \param base base URI (not relative URI)
//! \param rel relative URI
//! \return target URI joining base and relative URIs
//! \see RFC 3986 - section 5.2
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
uri
join (const uri& base, const uri& rel)
{
  uri target;

  // section 5.2.1 - "base" must be an URI
  if (base.get_scheme ().empty ())
    return target;

  // join
  target.set_fragment (rel.get_fragment ());

  if (!rel.get_scheme ().empty () && rel.get_scheme () != base.get_scheme ())
    {
      target.set_scheme (rel.get_scheme ());
      target.set_authority (rel.get_authority ());
      target.set_path (remove_dot_segments (rel.get_path ()));
      target.set_query (rel.get_query ());
    }
  else
    {
      target.set_scheme (base.get_scheme ());

      if (!rel.get_authority ().empty ())
        {
          target.set_authority (rel.get_authority ());
          target.set_path (remove_dot_segments (rel.get_path ()));
          target.set_query (rel.get_query ());
        }
      else
        {
          target.set_authority (base.get_authority ());

          if (rel.get_path ().empty ())
            {
              target.set_path (base.get_path ());
              target.set_query (rel.get_query ().empty () ? base.get_query () : rel.get_query ());
            }
          else
            {
              target.set_query (rel.get_query ());

              if (!rel.get_path ().empty () && rel.get_path ()[0] == '/')
                target.set_path (remove_dot_segments (rel.get_path ()));

              else
                target.set_path (remove_dot_segments (merge_paths (base, rel)));
            }
        }
    }

  target.normalize ();

  return target;
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief check whether two URI objects are equal
//! \param lhs URI object
//! \param rhs URI object
//! \return true if lhs == rhs
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
bool
operator== (const uri& lhs, const uri& rhs)
{
  return lhs.get_value () == rhs.get_value ();
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief check whether one URI object is less than another one
//! \param lhs URI object
//! \param rhs URI object
//! \return true if lhs < rhs
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
bool
operator< (const uri& lhs, const uri& rhs)
{
  return lhs.get_value () < rhs.get_value ();
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief check whether two URI objects are equal
//! \param lhs URI object
//! \param rhs URI object
//! \return true if lhs == rhs
//! \see RFC 3986 - section 4.4
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
bool
is_same_document (const uri& lhs, const uri& rhs)
{
  return lhs.get_scheme () == rhs.get_scheme () &&
         lhs.get_authority () == rhs.get_authority () &&
         lhs.get_path () == rhs.get_path () &&
         lhs.get_query () == rhs.get_query ();
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief convert URL into local path
//! \param url URL
//! \return path
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
std::string
get_path_from_url (const std::string& url)
{
  mobius::uri uri (url);
  return uri.get_path ();
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief convert path into URL
//! \param path path
//! \return URL
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
std::string
get_url_from_path (const std::string& path)
{
  mobius::uri uri;
  uri.set_scheme ("file");
  uri.set_path (path);
  return uri.get_value ();
}

} // namespace mobius
