This algorithm consists in the creation of a binary tree in which each of the leaves contain a character string to compress with the frequency whit which it is repeated in the character string.
Based on the assignment of codes to different bit length of each character composing a text string. If short codes are assigned to the characters that appear more often, you get a greater understanding of the text string.
Process
I give the input character string, in this case use the next line of text for example:
Laptop nueva
1. Characters ordered from lowest to highest according to the number of times they are repeated.
2. We create a new tree root that contain the character with their respective number of frequency in the chain and insert the tree in a tree list.
3. Each of the characters as a tree is created and added to a list, each tree has its associated symbol and frequency number.
4. From the above it will select the first two trees from the list and will form a new tree with them:
5. The root of the new tree will be the sum of the frequencies of the roots of two trees to be joined and tree are now child nodes
6. It adds to the list the new tree created by ordering from lowest to highest
7. It gets to select two trees from the list and will form a new tree with them, before the tree realiar delete the two trees of the list, until a single tree in the list.
Not operate properly but here is some code that would make these processes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from Tkinter import * | |
from sys import argv | |
import math | |
import sys, string | |
cam = list() | |
codigos = list() | |
frec = list() | |
simbolos = {} | |
arbol_fin ='' | |
class Principal: | |
#der=None | |
#izq=None | |
#padre=None | |
def __init__(self, padre, izq=None, der=None): | |
#self.der.append(None) | |
#self.izq.append(None) | |
#self.padre.append(None) | |
self.padre=padre | |
self.izq=izq | |
self.der=der | |
#arbol por ramas y frecuencias | |
def nodos(frec,texto): | |
global arbol_fin | |
bosque = list() | |
while ((len(frec) > 1)): | |
print "raiz", frec[0][1] + frec[1][1] | |
ramas = Principal(frec[0][1] + frec[1][1],frec[0][0],frec[1][0]) | |
print "nodo izquierdo",frec[0][0] | |
print "nodo derecho",frec[1][0] | |
frec.append((ramas,frec[0][1] + frec[1][1])) | |
del frec[0] | |
del frec[1] | |
frec=sorted(frec,key=lambda it: it[1]) | |
#raw_input() | |
arbol_ordenado(frec[0][0],'') | |
for i in texto: | |
arbol_fin+=simbolos[i] | |
return frec[0][0] | |
#ordenamos de menor a mayor | |
def ordenar(texto): | |
for i in texto: | |
if (i) not in codigos: | |
codigos.append(i) | |
cam.append((Principal(i),simbolos[i])) | |
frec=sorted(cam,key=lambda it: it[1]) | |
print 'final-prueba',frec | |
return frec | |
def arbol_ordenado(frec,cam): | |
if frec==None: | |
return cam[:-1] | |
simbolos[frec.padre]=arbol_fin=arbol_ordenado(frec.izq,cam=cam+'0') | |
simbolos[frec.padre]=arbol_fin=arbol_ordenado(frec.der,cam=cam+'1') | |
def juntos(texto): | |
for i in texto: | |
if i not in simbolos: | |
simbolos[i]=1 | |
else: | |
simbolos[i]=simbolos[i]+1 | |
return | |
def mostrar(palabra_1): | |
print palabra_1 | |
valor=palabra_1 | |
for i in arbol_final: | |
if i=='0': | |
valor=valor.izq | |
if valor.izq==None and valor.der==None: | |
palabra+=valor.padre | |
valor=palabra_1 | |
else: | |
valor=valor.der | |
if valor.izq==None and valor.der==None: | |
palabra+=valor.padre | |
valor=palabra_1 | |
def main(): | |
texto=raw_input('Cadena a utilizar: ') | |
juntos(texto) | |
frec=ordenar(texto) | |
palabra_1=nodos(frec,texto) | |
print arbol_final | |
mostrar(palabra_1) | |
if __name__ == "__main__": | |
main() |
Continued...
Link
Huffman
3+2. Necesitarías estar aquí para elegir un ejercicio por puntos extra. Vas mal.
ResponderEliminar