#!/usr/bin/python

# count number of rows with value by column
count = dict()
for line in open("trn.dat", "r"):
    value = line.strip("\r\n").split(" ")
    for i in range(1, len(value)):
        pair = value[i].split(":")
        if (pair[0] in count):
            count[pair[0]] = count[pair[0]] + 1
        else:
            count[pair[0]] = 1

# remove columns where not many rows have a value
nextIndex = 1
index = dict()
for key in sorted(count.keys()):
    if (count[key] >= 10):
        index[key] = str(nextIndex)
        nextIndex = nextIndex + 1

input = open("trn.dat", "r")
output = open("trn.new", "w")
for line in input:
    line = line.strip("\r\n")
    value = line.split(" ")
    pairlist = []
    for i in range(1, len(value)):
        pair = value[i].split(":")
        if (pair[0] in index):
            pairlist.append(index[pair[0]] + ":" + pair[1])
    output.write(value[0] + " " + " ".join(pairlist) + "\n")

input.close()
output.close()
input = open("tst.dat", "r")
output = open("tst.new", "w")
for line in input:
    line = line.strip("\r\n")
    value = line.split(" ")
    pairlist = []
    for i in range(1, len(value)):
        pair = value[i].split(":")
        if (pair[0] in index):
            pairlist.append(index[pair[0]] + ":" + pair[1])
    output.write(value[0] + " " + " ".join(pairlist) + "\n")
input.close()
output.close()
