#!/usr/bin/env python # Author: Dave Hull # License: Public Domain # Version: 0.02 # Run this script from the command line as follows: # # python yahoo_msg_decoder.py # # This was a hack I wrote while working a forensics case several years # ago. It did the job that needed to be done, namely making Yahoo! IM # log files readable. I recently updated the script so it quits without # throwing an exception under normal circumstances. Yes, it used to # throw an exception every time it was run due to the lack of end of file # handling. Maybe in a few more years, I'll tackle some other items on the # To Do list. # # This script is now interactive, use -h to get usage. # # To Do: # Prevent output from going bold given certain byte strings. # Provide output options for HTML. import re, os, struct, argparse from time import gmtime, strftime parser = argparse.ArgumentParser(description='Process Yahoo! IM dat files.') parser.add_argument('--username', help='The Yahoo! IM username', dest='username', default='unknown') parser.add_argument('--other_party', default='other_party', help='other party to conversation', dest='other_party') parser.add_argument('--file', help='dat file to process', dest='filename') args = parser.parse_args() def getUserName(): username = args.username m = re.match('^[A-Za-z]{1}\w*\.?\w*$', username) if m is not None and 3 < len(username) < 33: return username else: print '''That's not a valid Yahoo Messenger ID. Try again.''' quit() def getFileName(): return args.filename def openFile(filename): filehandle = open(filename, 'rb') return filehandle def getHeader(filehandle): packedTime = filehandle.read(4) tz = filehandle.read(1) return tz def getMsgTime(filehandle): packedTime = filehandle.read(4) # time is always 4 bytes utcOffset = filehandle.read(1) # UTC offset follows time try: unpackedTime = struct.unpack("i", packedTime)[0] except struct.error: print "End of file" quit() return strftime("%Y-%m-%d %H:%M:%S UTC", gmtime(unpackedTime)) def getMsg(filehandle, readlength, username): msg = '' packedMsg = filehandle.read(readlength) mLength = str(readlength) + "s" unpackedMsg = struct.unpack(mLength, packedMsg)[0] msgLen = len(unpackedMsg) usernameLen = len(username) while msgLen > len(username): username += username for i in range(msgLen): msg += chr(ord(unpackedMsg[i]) ^ ord(username[i])) filehandle.read(1) # there's a null byte after the message return msg def getReservedBits(filehandle): filehandle.read(3) # reserved bits are always in 3s def getNBits(filehandle, n): filehandle.read(n) def getSent(filehandle): sent = filehandle.read(1) return sent def getMsgLength(filehandle): msgLength = filehandle.read(1) return ord(msgLength) username = getUserName() filename = getFileName() filehandle = openFile(filename) # Need to read header to determine how to parse file if getHeader(filehandle) == 0: getReservedBits(filehandle) getSent(filehandle) getNBits(filehandle, 11) else: filehandle.seek(0) while 1: msgTime = getMsgTime(filehandle) getReservedBits(filehandle) if ord(getSent(filehandle)): print args.other_party + " (" + msgTime + "):", else: print username + " (" + msgTime +"):", getReservedBits(filehandle) msgLen = getMsgLength(filehandle) getReservedBits(filehandle) print getMsg(filehandle, msgLen, username) getReservedBits(filehandle) filehandle.close()