import PIL.Image as pimg
import numpy as np
import sklearn.linear_model as lm
import statistics as st
import pandas as pd


def buildImage(red, green, blue, show = True):
    
    data = np.dstack((red, green, blue))
    img = pimg.fromarray(np.uint8(np.array(data)))
    
    if (show):
        img.show()
        
    return img


def colorRegression (color1, color2, dependent):
    
    regr = lm.LinearRegression()
    
    independents = np.reshape(np.dstack(
        (color1, color2)),
        (len(color1) * len(color1[0]), 2
    ))
    regr.fit(independents, dependent.flatten())
    
    return [regr.coef_[0], regr.coef_[1], regr.intercept_]


def computeLayer(color1, color2, color1_cf, color2_cf, intercept):
    
    raw = color1 * color1_cf + color2 * color2_cf + intercept
    
    # Round float values and eliminate negatives
    return np.clip(np.around(raw).astype(int), 0, 1000)


def medianize(layer, max_error, fill_number = None):
    
    error = 0
    anchor = layer[0][0] # The first layer value
    queue = []
    layer_flat = layer.flatten()
    layer_len = len(layer_flat)
    result = [] * layer_len
    frequencies = [0] * 256 # frequencies[x] is the frequency of x in the layer

    for idx, i in enumerate(layer_flat):
        error = abs(int(i) - int(anchor))

        # If is the difference too big
        if (error > max_error or idx == layer_len - 1): 
    
            # True only for the last cycle run
            if (idx == layer_len - 1): 
                queue.append(int(i))
                
            anchor = i 
            
            # Median from the similar values
            median = round(st.median(queue)) 
            
            frequencies[median] += 1
            result.append(median)
            
            # Fill the result layer
            for m in range(len(queue) - 1):
                
                # Previously declared value or median
                if (fill_number == None):
                    
                    result.append(median)
                    frequencies[median] += 1
                    
                else:
                    
                    result.append(fill_number)
                    frequencies[fill_number] += 1
                
            queue = []

        queue.append(int(i))
    
    return [
        # Layer as the original-shaped 2D array
        np.uint8(np.array(result)).reshape(
            len(layer),
            len(layer[0])
        ),
        # Frequencies of the color values
        np.array(frequencies)
    ]


def createNode (left, right, number, frequency):
    
    return [left, right, number, frequency, ""]


def getLeft(node):
    
    return node[0]


def getRight(node):
    
    return node[1]


def getNumber(node):
    
    return node[2]


def getFrequency(node):
    
    return node[3]


def getCode(node):
    
    return node[4]


def setCode(node, code):
    
    node[4] = code
    return node


def createNodes(frequencies):
    
    nodes = []
    
    for f in range(len(frequencies)):
        
        # If the frequency is zero
        if (frequencies[f] == 0):
            continue
            
        nodes.append(createNode(None, None, f, frequencies[f]))
    
    return nodes


def getRarestNodes(nodes):
    
    # Sort nodes ascendingly by their frequency
    nodes = sorted(nodes, key = lambda n: getFrequency(n))
    
    return [nodes[0], nodes[1]]


def buildTree(frequencies):
    
    nodes = createNodes(frequencies)
    
    # While the nodes are not on their proper places
    while len(nodes) > 1:

        left, right = getRarestNodes(nodes)

        # Assign code to the nodes
        left = setCode(left, 0)
        right = setCode(right, 1)

        # Create the parent node for the nodes with the least frequencies
        node = createNode(
            left, 
            right, 
            getNumber(left) + getNumber(right), # Numbers have the role of alphabet sybmols, so they are strings 
            getFrequency(left) + getFrequency(right) # Sum of the both frequencies
        )

        # Nodes are copied to the proper place, so remove the old ones
        nodes.remove(left)
        nodes.remove(right)
        
        # Append the newly created node to the node array
        nodes.append(node)
        
    # Return the root node    
    return nodes[0]


def getDictionary(node, code_prefix = "", nodes = None):
     
    previous_nodes = nodes
    
    # This is typically true when calling the function from outside
    if (nodes == None):
        nodes = []
    
    # Merge new code with the previous part
    code = code_prefix + str(getCode(node))
    
    left_node = getLeft(node)
    right_node = getRight(node)
    
    # If left node exists
    if(left_node):
        
        # Get what is on the left and append it to the code
        left_nodes = getDictionary(left_node, code, nodes)
        
        if (left_nodes):
            nodes.append(left_nodes)

    # If right node exists
    if(right_node):
        
        # Get what is on the right and append it to the code
        right_nodes = getDictionary(right_node, code, nodes)
        
        if (right_nodes):
            nodes.append(right_nodes)
            
    # If there are no prevous nodes and the dictionary is done        
    if (previous_nodes == None):
        return dict(np.array(
                sorted(nodes, key = lambda n: int(n[0]))
        ))
    
    # If this node has no children
    if(not left_node and not right_node):
        return [getNumber(node), code]


def layerToBinary(layer, frequencies):
    
    # Build the Huffman tree and get the dictionary from it
    tree = buildTree(frequencies)
    dictionary = getDictionary(tree)
    
    # Translate using the dictionary, join to the string, pack to the chunks, translate to decimal
    return np.packbits(np.array(list("".join(
        np.vectorize(dictionary.get)(layer.astype(str))
        .flatten()
        .tolist()
    ))).astype(int))


def saveCompressed(name, binary):
    
    # Data is saved as a .small file
    file = open(name + ".small", "wb")
    file.write(bytearray(binary))
    file.close()


def saveRecipe(frequencies, width, height, red_regr_coefficient, green_regr_coefficient, regr_intercept):
    
    recipe = np.array([
        ["fq", frequencies],
        ["w", width],
        ["h", height],
        ["rc", red_regr_coefficient],
        ["gc", green_regr_coefficient],
        ["ic", regr_intercept]
    ], dtype = object)

    # The recipe is saved as a .small.npy file
    np.save(IMAGE + ".small", recipe)


def loadCompressed(name):
    
    recipe = dict(np.load(IMAGE + ".npy", allow_pickle = True))
    
    img_small = open(IMAGE, "rb")
    
    # Get binary from the compressed file
    binary = np.unpackbits(np.frombuffer(bytearray(img_small.read()), dtype=np.uint8))
    
    return [binary, recipe]


def translateBinary(binary, dictionary):
    
    buffer = ""
    layer = []

    for i in binary:
        buffer += str(i)

        # If the code exists in the dictionary
        if buffer in dictionary:
            layer.append(dictionary[buffer])
            buffer = ""
            
    return layer


def forwardFillLayer(layer, recipe):
    
    # Replace zeros by the previous value (median)
    replaced = pd.DataFrame(np.array(layer)).replace(to_replace = "0", method = "ffill")

    # Reshape the array accoring to the image width and height
    return np.array(replaced).flatten().astype(int).reshape(recipe["h"], recipe["w"])


# ---------------- SETTINGS ----------------

# Image to compress
IMAGE = "lenna.png"

# Value which determining maximal difference of the layer values to be considered as the same
# High value means poor quality, but small size of the compressed file and vice versa
MAX_ERROR = 10

# ------------------------------------------

# Get the image and split into layers
img = np.array(pimg.open(IMAGE))
red = img[:,:,0]
green = img[:,:,1]
blue = img[:,:,2]

# Get the regression coefficients and the intercept
red_cf, green_cf, intercept = colorRegression(red, green, blue)

# Replace similar values and get the frequency arrays
red_med, red_freq = medianize(red, MAX_ERROR, 0)
green_med, green_freq = medianize(green, MAX_ERROR, 0)

# Compress the layers using the Huffman algorithm
binary = layerToBinary(
    np.array([red_med, green_med]),
    red_freq + green_freq
)

saveCompressed(IMAGE, binary)
saveRecipe(red_freq + green_freq, len(red[0]), len(red), red_cf, green_cf, intercept)

# Count sizes and its ratio
uncomp_size = (len(red) * len(red[0]) * 3) / 1000
comp_size = len(binary) / 1000
ratio = round((comp_size / uncomp_size) * 100, 2)

print("Uncompressed size: " + str(uncomp_size) + " kilobytes")
print("Compressed size: " + str(comp_size) + " kilobytes (" + str(ratio) + " %)")

Uncompressed size: 786.432 kilobytes
Compressed size: 184.769 kilobytes (23.49 %)


# ---------------- SETTINGS ----------------

# Image to decompress
IMAGE = "lenna.png.small"

# ------------------------------------------

binary, recipe = loadCompressed(IMAGE)

tree = buildTree(recipe["fq"])
dictionary = getDictionary(tree)

layer = translateBinary(
    binary,
    dict(zip(dictionary.values(), dictionary.keys())) # Reversed dictionary
)

image_size = recipe["w"] * recipe["h"]

red = forwardFillLayer(layer[: image_size], recipe)
green = forwardFillLayer(layer[image_size : 2 * image_size], recipe)

blue_regr = computeLayer(red, green, recipe["rc"], recipe["gc"], recipe["ic"])

buildImage(red, green, blue_regr, False)

The Final Compression Project – Compressão e Codificação de Dados¶

Brief description¶

Imports¶

Function to create a image from the three separate layers¶

Regression functions¶

Function to count regression coefficients¶

Function to compute color values of a layer¶

Function to replace similar values by their median¶

Functions to handle Huffman tree nodes¶

Function to create the mode¶

Functions to get and set node properties¶

Function to create a list of nodes¶

Function to get two tree nodes with the least frequency (the least probability of observing)¶

Function to build the Huffman tree¶

Function to read the Huffman tree and create a dictionary from it¶

Function to get the binary from the color layer¶

Function to save binary to a file¶

Function to save a recipe to a file¶

Function to load the compressed file and the recipe¶

Function to translate binary accoring to the dictionary¶

Function to replace zeros by the previous value¶

Compression¶

Decompression¶