Utilisateur:Jona/generate table.py

#! /usr/bin/env python
# -*- coding: utf-8 -*-

#Format the pickles files dictOcc, dictOcc.old, listOcc and listOcc.old to make a table with corresponding colors for entries

import sys, re, getopt
import pickle
from string import strip


def compute_occ_subdict(extendedl,subdictTemplate={}):
    """Compute occurence of each item with a dictionary in a dictionary

    Return a dict of dicts"""
    occ = {}
    for e in extendedl:
        if occ.get(e[1],0) == 0: #Subdict not yet created
            occ[e[1]] = dict(subdictTemplate) #Do not copy the reference but create a new one from template
        occ[e[1]][e[0][0]] = occ.get(e[1],0).get(e[0][0],0) + 1
    return occ

def file_to_list(nameFile):
    f = open(nameFile,'r')
    l =[]
    for line in f:
        l.append(line.strip('\n'))
    f.close()
    return l

def list_to_file(l,nameFile):
    """Print a list to a file.
    For list not used in this module, the result can be unexpected"""
    if type(l) != list:
        print "Warning: The argument is not a list (list_to_file())"
        print "Unexpected behavior can occur"
    f = open(nameFile,'w')
    for s in l:
        if type(s) == tuple:
            f.write(str(s[0])+"\t"+str(s[1])+'\n')
        elif type(s) == str:
            f.write(s+'\n')
        else:
            print "This format is not supported"
    f.close()

def dict_to_file(d,nameFile):
    """Print a dict to a file.
    For dict not used in this module, the result can be unexpected"""
    if type(d) != dict:
        print "Warning: The argument is not a dict (dict_to_file())"
        print "Unexpected behavior can occur"
    f = open(nameFile,'w')
    for s in d:
        f.write(s+' '+d[s]+'\n')
    f.close()

def check_mapping(p): 
    mapping = [(50000, "bgcolor='#000000' style='color:white;' |"), (20000, "bgcolor='#20FF20'|"), (5000, "bgcolor='#6EFF6E'|"), (1000, "bgcolor='#F5FFF5'|"),  (0,"") ] # Add all your values and returns here 

    for check, value in mapping: 
        if p >= check: 
            return value 

def check_mapping_diff(p):
    if p < 0:
        return "bgcolor='red'|"
    elif p==0:
        return "bgcolor='grey'|"
    mapping = [(5000, "bgcolor='#000000' style='color:white;' |+"), (2000, "bgcolor='#20FF20'|+"), (500, "bgcolor='#6EFF6E'|+"), (100, "bgcolor='#F5FFF5'|+"),  (0,"+") ] # Add all your values and returns here 

    for check, value in mapping: 
        if p >= check: 
            return value 

def write_item(it,f):
    f.write(check_mapping(it))
    f.write(" "+str(it))

def write_item_diff(it,f):
    f.write(check_mapping_diff(it))
    f.write(" "+str(it))

def write_line(dict,f,lineNumber):
    f.write('|'+str(lineNumber)+'||bgcolor="#EEEEEE" |[[:Catégorie:{{nom langue|'+dict["lang"]+'}}|{{nom langue|'+dict["lang"]+'}}]]||')
    write_item(dict["total"],f)
    f.write(' ||')
    write_item(dict["oldTotal"],f)
    f.write(' ||')
    write_item_diff(dict["total"]-dict["oldTotal"],f)
    f.write(' ||')
    write_item(dict["nom"],f)
    f.write(' ||')
    write_item(dict["nom-pr"],f)
    f.write(' ||')
    write_item(dict["adj"],f)
    f.write(' ||')
    write_item(dict["verb"],f)
    f.write(' ||')
    write_item(dict["adv"],f)
    f.write(' ||')
    write_item(dict["total"]-dict["flex"],f)
    f.write(' ||')
    write_item(dict["flex"],f)
    f.write(' ||')
    write_item(dict["loc"],f)
    f.write(' ||')
    write_item(dict["stub"],f)
    f.write('\n')
    #f.write('| %(lang)s || %(total)d || %(nom)d || %(nom-pr)d || %(adj)d || %(verb)d || %(adv)d\n' % dict)

def format_and_write(l,outf):
    """Format the list of tuple and write it to a file formated for a wikitable"""
    fFinal = open(outf,'w')
    for line in l:
        fFinal.write('|-\n')
        fFinal.write('| %(lang)s || %(number)d\n' % {'lang': line[1],'number': line[0]})
    fFinal.close()

def format_and_write_more(d,l,outf):
    """Format the list of tuple and write it to a file formated for a wikitable"""
    fFinal = open(outf,'w')
    for line in l:
        dictFull = {'lang' : line[1], 'total' : line[0]}
        if d.has_key(line[1]):
            dictFull.update(d[line[1]])
            fFinal.write('|-\n')
            write_line(dictFull,fFinal)
        else:
            print 'no dictionary for ',line[1]
    fFinal.close()

def format_and_write_both(d,l,oldd,oldl,outf):
    """Format the list of tuple and write it to a file formated for a wikitable"""
    lineNumber = 0
    fFinal = open(outf,'w')
    for line in l:
        lineNumber = lineNumber + 1
        dictFull = {'lang' : line[1], 'total' : line[0]}
        dictFull.update({'oldTotal' : oldl.get(line[1],0)})
        if d.has_key(line[1]):
            dictFull.update(d[line[1]]) #Add the dictionary of type in a global dictionary
        #     fFinal.write('|-\n')
        #     write_line(dictFull,fFinal,lineNumber)
        else:
            dictFull.update({'nom':0,'nom-pr':0,'adj':0,'verb':0,'adv':0,'flex':0,'loc':0, 'stub':0})
        fFinal.write('|-\n')
        write_line(dictFull,fFinal,lineNumber)
    fFinal.close()

def usage():
    sys.stderr.write("""Options available are\n
-h --help       
-v --verbose    Enter verbose mode
-i --input      Specify an input directory
-o --output     Specify an output filename
-d              (nothing changing)\n""")


def main(argv):

    global _verbose
    global _debug
    _verbose = 0
    _debug = 0

    outf = 'langsTableCol'

    try:
        opts, args = getopt.getopt(argv, "hvi:o:d", ["help", "verbose", "input=", "output="])
    except getopt.GetoptError:
        sys.stderr.write("Illegal argument\n")
        usage()
        sys.exit(2)

    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            sys.exit(0)
        elif opt == '-d':
            _debug = 1
            #TODO: put debug condition
        elif opt in ("-v", "--verbose"):
            _verbose = 1
            #TODO: put verbose condition
        elif opt in ("-o", "--output"):
            #TODO: verify that arg is a directory path
            outf = arg
        elif opt in ("-i", "--input"):
            #TODO: verify that arg is a directory path
            inputdir = arg

    fpickle = open('dictOcc','r')
    typeOcc=pickle.load(fpickle)
    fpickle.close()

    fpickle = open('listOcc','r')
    langsOcc=pickle.load(fpickle)
    fpickle.close()

    fpickle = open('dictOcc.old','r')
    typeOccOld=pickle.load(fpickle)
    fpickle.close()

    fpickle = open('listOcc.old','r')
    langsOccOld=pickle.load(fpickle)
    fpickle.close()

    #Sort the result
    lLangsSorted = []
    for lang in langsOcc.keys():
        lLangsSorted.append((langsOcc[lang], lang))
    lLangsSorted.sort(reverse=True)


    #Give a sum per type for all languages
    sumPerType = {'nom':0,'nom-pr':0,'adj':0,'verb':0,'adv':0,'flex':0,'loc':0, 'stub':0}
    for lang in typeOcc.values():
        #DEBUG
        #print lang
        #END DEBUG
        for i in sumPerType.keys() :
            sumPerType[i] = sumPerType[i] + lang[i]

    print "Total : nom : {nom:d} , nom-pr: {nom-pr:d} , adj: {adj:d} , verb: {verb:d} , adv: {adv:d} , flex: {flex:d} , loc: {loc:d} , stub: {stub:d}".format(**sumPerType)
    #Give total for all words
    print "New total : ", sum(langsOcc.values()), " Old total : ", sum(langsOccOld.values())," diff : ", sum(langsOcc.values())-sum(langsOccOld.values())

    #Format it to be directly pastable in the wiktionary table
    print "\n| || bgcolor='yellow' | Total général ||", sum(langsOcc.values()), "||", sum(langsOccOld.values()),"||", sum(langsOcc.values())-sum(langsOccOld.values()), "||", sumPerType["nom"], "||", sumPerType["nom-pr"], "||", sumPerType["adj"], "||",  sumPerType["verb"], "||", sumPerType["adv"], "||", sum(langsOcc.values())-sumPerType["flex"], "|| {flex:d} || {loc:d} || {stub:d}\n".format(**sumPerType)

    # Print the result
    format_and_write_both(typeOcc,lLangsSorted,typeOccOld,langsOccOld,outf)
    if _verbose:
        print 'Output written in "%s"'% outf

if __name__ == '__main__':
    main(sys.argv[1:])