# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Mobius Forensic Toolkit
# Copyright (C) 2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 Eduardo Aguiar
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
import re
import mobius
import libxml2
import string

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Constants
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
RE_SMILE = re.compile ('<ss type="([a-z]+)".*?</ss>')

SMILE_CHARS = {
  'angel'       : 0x1f47c,
  'angry'       : 0x1f620,
  'blush'       : 0x1f633,
  'brokenheart' : 0x1f494,
  'cat'         : 0x1f408,
  'cool'        : 0x1f60e,
  'cry'         : 0x1f622,
  'cwl'         : 0x1f602,
  'dance'       : 0x1f57a,
  'devil'       : 0x1f608,
  'dull'        : 0x1f644,
  'giggle'      : 0x1f92d,
  'heart'       : 0x02764,
  'facepalm'    : 0x1f926,
  'kiss'        : 0x1f617,
  'hearteyes'   : 0x1f60d,
  'inlove'      : 0x1f970,
  'laugh'       : 0x1f603,
  'lips'        : 0x1f48b,
  'lipssealed'  : 0x1f910,
  'mmm'         : 0x1f60b,
  'nerd'        : 0x1f913,
  'praying'     : 0x1f64f,
  'puke'        : 0x1f92e,
  'rofl'        : 0x1f923,
  'sad'         : 0x1f627,
  'sleepy'      : 0x1f62a,
  'smile'       : 0x1f604,
  'speechless'  : 0x1f610,
  'stareyes'    : 0x1f929,
  'surprised'   : 0x1f632,
  'think'       : 0x1f914,
  'tongueout'   : 0x1f61b,
  'unamused'    : 0x1f612,
  'wasntme'     : 0x1f644,
  'wink'        : 0x1f609,
  'worry'       : 0x1f61f,
  'xd'          : 0x1f606
  }

TAG_NAME_CHARS = set (string.lowercase + string.uppercase)

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# @brief Parse Skype message text
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
def parse (text):
  items = []

  # pre-format
  text = RE_SMILE.sub (emoji_replace, text)
  
  # walk through text
  i = 0
  start = 0
  state = 0
  length = len (text)
  tmp_text = ''

  while i < length:
    start = text.find ('<', i)

    if start == -1:
      items.append (new_text_message (text[i:]))
      i = length

    else:	# '<' char found
      if i < start:
        items.append (new_text_message (text[i:start]))
        i = start
      
      # get tag name
      i += 1
      tag = ''
      while i < length and text[i] in TAG_NAME_CHARS:
        tag = tag + text[i]
        i = i + 1

      # if tag name found, search for end tag (</tag>)
      if tag:
        end = text.find ('</%s>' % tag, i)
        
        if end == -1:
          items.append (new_text_message (text[i:]))
          i = length

        else:
          end = end + len (tag) + 3
          items.append (parse_tag (tag, text[start:end]))
          i = end
      
      # no tag name found
      else:
        i = i + 1
    
  return items

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# @brief Parse Skype message text tag
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
def parse_tag (tag, text):
  item_type = 'unknown'
  data = {'text': text}

  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # <a>
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  if tag == 'a':
    item_type = 'href'
    data = parse_a (text)

  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # <quote>
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  elif tag == 'quote':
    item_type = 'quote'
    data = parse_quote (text)

  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # <sms>
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  elif tag == 'sms':
    item_type = 'system'
    data = parse_sms (text)
    data['text'] = 'SMS message sent to %s: %s' % (', '.join (data.get ('targets')), data.get ('alt'))

  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # <files>
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  elif tag == 'files':
    item_type = 'system'
    filelist = parse_files (text)

    if len (filelist) == 1:
      f = filelist[0]
      data['text'] = 'File "%s" sent (size: %d bytes)' % (f.get ('name'), f.get ('size'))

    elif len (filelist) > 1:
      data['text'] = 'Files %s sent' % ', '.join ('"%s"' % f.get ('name') for f in filelist)

  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # <partlist>
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  elif tag == 'partlist':
    item_type = 'system'
    data = parse_partlist (text)
    parts = [ (p.get ('account_id'), p.get ('account_name')) for p in data.get ('parts') ]

    data['text'] = 'Participants: %s' % ', '.join ('%s (%s)' % (account, name) if name else account for (account, name) in sorted (parts))

  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # <URIObject>
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  elif tag == 'URIObject':
    item_type = 'system'
    data = parse_uriobject (text)
    
    s_text = 'File "%s" shared' % (data.get ('name') or data.get ('meta_name'))

    if 'filesize' in data:
      s_text += ' (size: %d bytes)' % data.get ('filesize')

    s_text += '. URL: %s' % data.get ('uri')
    data['text'] = s_text

  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # <deletemember>
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  elif tag == 'deletemember':
    item_type = 'system'
    data = parse_deletemember (text)
    data['text'] = 'Member %s deleted from chat by %s in %s' % (data.get ('target'), data.get ('initiator'), data.get ('timestamp'))

  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # <emoji>
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  elif tag == 'emoji':
    pass

  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # Unknown tag
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  else:
    mobius.core.log ('app.skype: Unknown tag <%s>' % tag)

  return item_type, data

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# @brief Parse <a> node
# @param text
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
def parse_a (text):
  data = {}

  doc = libxml2.parseDoc (text)
  node = doc.getRootElement ()
  data['url'] = node.prop ('href')
  data['text'] = node.getContent ().rstrip ()
  doc.freeDoc ()

  return data

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# @brief Parse <quote> node
# @param text
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
def parse_quote (text):
  data = {}

  doc = libxml2.parseDoc (text)
  node = doc.getRootElement ()
  timestamp = node.prop ('timestamp')

  data['author_id'] = node.prop ('author')
  data['author_name'] = node.prop ('authorname')

  if timestamp:
    data['timestamp'] = mobius.datetime.new_datetime_from_unix_timestamp (int (timestamp))

  node = node.children
  text = ''

  while node:
    if node.type == 'text':
      text += node.getContent ()
      
    node = node.next

  doc.freeDoc ()
  data['text'] = text

  return data

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# @brief Parse <sms> node
# @param text
# @return SMS message metadata
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
def parse_sms (text):
  data = {}
  
  doc = libxml2.parseDoc (text)
  node = doc.getRootElement ()
  data['alt'] = node.prop ('alt')
  node = node.children

  while node:
    if node.type == 'element' and node.name == 'targets':
      data['targets'] = parse_sms_targets (node)

    elif node.type == 'element' and node.name == 'sendtimestamp':
      data['timestamp'] = mobius.datetime.new_datetime_from_unix_timestamp (int (node.getContent ()))

    elif node.type == 'element' and node.name == 'price':
      data['price'] = node.getContent ()

    node = node.next      
  
  doc.freeDoc ()

  return data

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# @brief Parse <targets> node
# @param text
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
def parse_sms_targets (node):
  targets = []

  node = node.children

  while node:
    if node.type == 'element' and node.name == 'target':
      target = node.getContent ()
      targets.append (target)

    node = node.next      

  return targets

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# @brief Parse <files> node
# @param text
# @return file list
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
def parse_files (text):
  filelist = []

  doc = libxml2.parseDoc (text)
  node = doc.getRootElement ()
  node = node.children

  while node:
    if node.type == 'element' and node.name == 'file':
      data = {}
      data['size'] = int (node.prop ('size') or 0)
      data['status'] = node.prop ('status')
      data['timestamp'] = node.prop ('tid')
      data['name'] = node.getContent ().rstrip ()
      filelist.append (data)

    node = node.next      
  
  doc.freeDoc ()

  return filelist

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# @brief Parse <partlist> node
# @param text
# @return Participant list
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
def parse_partlist (text):
  data = {}

  doc = libxml2.parseDoc (text)
  node = doc.getRootElement ()
  data['type'] = node.prop ('type')
  parts = []

  node = node.children

  while node:
    if node.type == 'element' and node.name == 'part':
      part = parse_part (node)
      parts.append (part)

    node = node.next      
  
  doc.freeDoc ()
  data['parts'] = parts

  return data

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# @brief Parse <part> node
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
def parse_part (node):
  data = {}
  data['account_id'] = node.prop ('identity')
  node = node.children

  while node:
    if node.type == 'element' and node.name == 'name':
      data['account_name'] = node.getContent ()

    node = node.next

  return data

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# @brief Parse <URIObject> node
# @param text
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
def parse_uriobject (text):
  data = {}

  doc = libxml2.parseDoc (text)
  node = doc.getRootElement ()
  data['type'] = node.prop ('type')
  data['uri'] = node.prop ('uri')
  data['url_thumbnail'] = node.prop ('url_thumbnail')
  data['ams_id'] = node.prop ('ams_id')
  
  node = node.children

  while node:
    if node.type == 'element' and node.name == 'FileSize':
      data['filesize'] = int (node.prop ('v') or 0)

    elif node.type == 'element' and node.name == 'OriginalName':
      data['name'] = node.prop ('v')

    elif node.type == 'element' and node.name == 'meta':
      data['meta_type'] = node.prop ('type')
      data['meta_name'] = node.prop ('originalName')

    node = node.next

  doc.freeDoc ()

  return data

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# @brief Parse <deletemember> node
# @param text
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
def parse_deletemember (text):
  data = {}

  doc = libxml2.parseDoc (text)
  node = doc.getRootElement ()
  
  node = node.children

  while node:
    if node.type == 'element' and node.name == 'eventtime':
      timestamp = int (node.getContent ().strip ()) // 1000
      data['timestamp'] = mobius.datetime.new_datetime_from_unix_timestamp (timestamp)

    elif node.type == 'element' and node.name == 'initiator':
      data['initiator'] = node.getContent ().strip ().split (':', 1)[1]

    elif node.type == 'element' and node.name == 'target':
      data['target'] = node.getContent ().strip ().split (':', 1)[1]

    node = node.next

  doc.freeDoc ()

  return data

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# @brief Emoji replacement function
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
def emoji_replace (matchobj):
  emoticon_id = matchobj.group (1)
  c = SMILE_CHARS.get (emoticon_id)

  if c:
    return unichr (c).encode ('utf-8')

  else:
    mobius.core.log ('chat.skype: Unknown emoji "%s"' % emoticon_id)
    return '<emoji>%s</emoji>' % matchobj.group (1)

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# @brief Create new text message
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
def new_text_message (text):
  text = text.replace ('&apos;', "'")
  text = text.replace ('&quot;', '"')
  text = text.replace ('&lt;', '<')
  text = text.replace ('&gt;', '>')
  text = text.replace ('&amp;', '&')
  return 'text', {'text': text}
