#!/usr/bin/env python
#coding: utf-8
#
# cjoin2 マッチングセレクト（Open usp Tukubai版）
# 
# designed by Nobuaki Tounaka
# written by Ryuichi Ueda
#
# The MIT License
#
# Copyright (C) 2011 Universal Shell Programming Laboratory
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

_usage = "cjoin2 [+<string>] key=<n> <master> <tran>"
_version = "Fri Jul 27 21:03:24 JST 2012"
_code = "Open usp Tukubai (LINUX+FREEBSD/PYTHON2.4+, 3.1, 3.2/UTF-8)"
		# 1: 1st field
		# 3: 2nd and later fields with separator

import re
import os
import sys
import codecs

def die(msg):
	sys.stderr.write( 'Error[cjoin2] : %s\n' % msg)
	sys.exit(1)

def usage():
	sys.stderr.write( "Usage     :%s\n" % _usage );
	sys.stderr.write( "Version   :%s\n" % _version );
	sys.stderr.write( "	   %s\n" % _code );
	sys.exit(1)


def openReadFile(file_name):
	if file_name != "-":
		try:    return codecs.open(file_name,'r','utf-8')
		except: die("ファイルを開けません。")

	if sys.version_info[0] < 3:
		return codecs.getreader('utf-8')(sys.stdin)

	return codecs.getreader('utf-8')(sys.stdin.detach())

def resetStdout():
	if sys.version_info[0] < 3:
		return codecs.getwriter('utf-8')(sys.stdout)

	return codecs.getwriter('utf-8')(sys.stdout.detach())

def resetStderr():
	if sys.version_info[0] < 3:
		return codecs.getwriter('utf-8')(sys.stderr)

	return codecs.getwriter('utf-8')(sys.stderr.detach())

def getStringLength(s):
	widths = [ getCharLength(ch) for ch in s ]
	return sum(widths)


def getCharLength(ch):
	x = ord(ch)
	if x <= 0x7f:
		return 1
	if x >= 0xff61 and x <= 0xff9f:
		return 1

	return 2

def setMasterDummyFixed(master_file,key_num,dummy_string):
	master = {}

	for line in master_file:
		tokens = line.rstrip("\n").split(" ")

		key = " ".join(tokens[0:key_num])
		values = " ".join(tokens[key_num:])

		master[key] = values

	for v in master.values():
		num = len(v.split(" "))
		dummy = " ".join([ dummy_string for e in range(num) ])
		return master,dummy

def setMaster(master_file,key_num):
	master = {}

	max_lengths = []
	for line in master_file:
		tokens = line.rstrip("\n").split(" ")

		key = " ".join(tokens[0:key_num])
		values = " ".join(tokens[key_num:])

		master[key] = values

		#各フィールドの長さを求める
		lengths = [ getStringLength(e) for e in tokens[key_num:] ]

		if len(max_lengths) == 0:
			max_lengths = [ e for e in lengths ]
		else:
			for i in range(len(max_lengths)):
				if max_lengths[i] < lengths[i] :
					max_lengths[i] = lengths[i]

	#ダミー文字列の作成
	tmp = [ "*" * e for e in max_lengths ] 

	return master, " ".join(tmp)

def getDummyStr(master):
	max_len = 0
	max_str = "" 
	for v in master.values():
		if max_len < len(v) :
			max_len = len(v)
			max_str = v

	field_num = len(max_str.split(" "))
	

#メイン関数
if __name__ == '__main__':

	if len(sys.argv) < 3 :		usage()
	if sys.argv[1] == '--help' :	usage()
	if sys.argv[1] == '--version':	usage()

	# 標準入出力utf8化
	sys.stdout = resetStdout()
	sys.stderr = resetStderr()

	# dオプションを解析
	dummy_string = None
	if sys.argv[1][0] == '+':
		dummy_string = sys.argv[1][1:]
		del sys.argv[1]
	elif sys.argv[1][0] == '-' and sys.argv[1][1] == 'd':
		dummy_string = sys.argv[1][2:]
		del sys.argv[1]

	# keyオプションを解析
	if sys.argv[1][0:4] == 'key=':
		num_str = sys.argv[1][4:]
		if "/" in num_str:
			tmp = num_str.split("/")
			key_from = int(tmp[0]) - 1
			key_to = int(tmp[1])

		else:
			key_from = int(num_str) - 1
			key_to = key_from + 1

		del sys.argv[1]

	else:
		die("invalid key position")

	# マスタファイルの内容を移す
	if dummy_string == None:
		master,dummy = setMaster(openReadFile(sys.argv[1]),key_to - key_from)
	else:
		master,dummy = setMasterDummyFixed(openReadFile(sys.argv[1]),key_to - key_from,dummy_string)

	if len(sys.argv) < 3 : f = "-"
	else:                  f = sys.argv[2]
	#メイン処理：トランザクションとマスタのマッチング
	for line in openReadFile(f):
		line = line.rstrip("\n")
		tokens = line.split(" ")

		key = " ".join(tokens[key_from:key_to])
		if key in master:
			prev = " ".join(tokens[0:key_to])
			end = " ".join(tokens[key_to:])
			print(" ".join([prev,master[key],end]).rstrip(' '))
		else:
			prev = " ".join(tokens[0:key_to])
			end = " ".join(tokens[key_to:])
			print(" ".join([prev,dummy,end]).rstrip(' '))
