# -*- coding: utf-8 -*-
"""
Purpose
-------
To conveniently retrieve and manipulate files.

Classes
-------
None

Functions
---------
fetch_files(root_folder, file_name)
    Finds all files with a specific name within the root directory
    and its sub directories.   
sort_files_by_modtime(root_folder, file_name)
    Retrieves all files with a specific name and sorts them by when
    they were last modified (oldest to newest).
merge_csvs(src, dest, stack=[], delim=',')
    Merges two .csv files.
merge_npzs(src, dest, stack=[])
    Merges two .npz files.
merge_fldrs(src, dest, stack=[], omit_files=[], report=True)
    Merge the data in two files unless they are to be omitted.
merge_dir(src, dest, **kwargs)
    Merge two directories. Goes through all of the folders
    in the src and dest root directories and will merge them. Does 
    not merge files in the root.


Change log
----------     
10 Feb 21 - Separated these functions from a more general toolkit.

"""

from shutil import move, rmtree
from os import listdir, rename, walk
from os.path import exists, isdir, getmtime, splitext
import datetime

from numpy import (array, argsort, copy, genfromtxt, hstack,
                   load, savetxt, savez_compressed, zeros)

def fetch_files(root_folder, file_name):
    '''
    Finds all files with a specific name within the root directory
    and its sub directories.
    
    Parameters
    ----------
    root_folder : string
        The root folder that will have its subdirectories searched.
    file_name : string
        The name of the files that need to be found.

    Returns
    -------
    f_list : array(string)
        A list of the files - with their paths - that were found in 
        and above the root drectory.

    '''
    f_list = []

    # Goes through all of the directories and finds all of the files
    #  that match the file_name    
    for root, dirs, files in walk(root_folder):
        if file_name in files:
            f_list.append(root + '/' + file_name)
        
    return f_list

def sort_files_by_modtime(root_folder, file_name):
    '''
    Retrieves all files with a specific name and sorts them by when
    they were last modified (oldest to newest).

    Parameters
    ----------
    root_folder : string
        The root folder that will have its subdirectories searched.
    file_name : string
        The name of the files that need to be found.

    Returns
    -------
    f_list : np.array(string)
        The list of retrieved files in the order of when they were
        last modified.
    datetimes : list(string)
        Time stamps of each files last modification time.

    '''
    f_list = fetch_files(root_folder, file_name)
    
    times = []
    for f in f_list:
        times.append(getmtime(f))
    sorting = argsort(times)
    times = array(times)[sorting]
    f_list = array(f_list)[sorting]
    
    # make the times information easier to read
    datetimes = []
    for t in times:
        datetimes.append(datetime.datetime.fromtimestamp(t))
        
    return f_list, datetimes

def merge_csvs(src, dest, stack=[], delim=','):
    '''
    Merges two .csv files.

    Parameters
    ----------
    src : string
        The file to merge with the dest.
    dest : string
        The file to be merged with.
    stack : list(string)
        The columns that should be stacked (i.e,. adding the last value
        in the dest file to the values in the src file). Default is [].
    delim : string, optional
        The text file's delimiter. The default is ','.

    Returns
    -------
    None.

    '''
    # Loads both files as string data types because it is the
    #   safest format
    src_dat = genfromtxt(src, delimiter=delim, 
                           names=True, dtype=None, 
                           encoding='ascii'
                           )
    dest_dat = genfromtxt(dest, delimiter=delim, 
                            names=True, dtype=None, 
                            encoding='ascii'
                            )
    
    # Edits the source file so that it has the proper trial IDs
    for col in stack:
        if (col in dest_dat.dtype.names) and (col in src_dat.dtype.names):
            src_dat[col] += dest_dat[col][-1]
        else:
            raise ValueError('The column', col, 'does not exist in',
                             'one of the files.')
    
    # Combines the data 
    full_dat = hstack((dest_dat, src_dat))
    
    # Writes the data
    with open(dest, 'w') as fp:
        fp.write('# ' + ','.join(full_dat.dtype.names) + '\n')
        savetxt(fp,full_dat, '%s', ',')

def merge_npzs(src, dest, stack=[]):
    '''
    Merges two .npz files.

    Parameters
    ----------
    src : string
        The file to merge with the dest.
    dest : string
        The file to be merged with.
    stack : list(string)
        The columns that should be stacked (i.e,. adding the last value
        in the dest file to the values in the src file). Default is [].

    Returns
    -------
    None.

    '''
    # Load data
    src_dat = load(src)
    dest_dat = load(dest)
    
    empty = zeros(dest_dat.f.trial.size + src_dat.f.trial.size)
    new_dat = {}
    
    size = dest_dat.f.trial.size
    for key, values in dest_dat.items():
        new_dat.update({key : copy(empty)})
        new_dat[key][:size] = values
    for key, values in src_dat.items():
        if key in stack:
            values += new_dat[key].max()
        new_dat[key][size:] = values
    
    src_dat.close()
    dest_dat.close()
    savez_compressed(dest ,**new_dat)

def merge_fldrs(src, dest, 
                stack=[], omit_files=[], report=True):
    '''
    Merge the data in two files unless they are to be omitted.

    Parameters
    ----------
    src : string
        The folder to merge with the dest.
    dest : string
        The folder to be merged with.
    stack : list(string)
        The columns that should be stacked (i.e,. adding the last value
        in the dest file to the values in the src file). 
        The default is ['trial'].
    omit_files : list(string), optional
        Files that should be omitted from the merger. The default is [].
    report : bool, optional
        Whether the results of the merging should be reported. 
        The default is True.

    Returns
    -------
    None.

    '''
    # Go through each of the files
    for file in listdir(src):      
        # Check if the file should be merged
        if file not in omit_files:  
            name, pofix = splitext(file)
            
            # Merge the files
            if (pofix == '.txt') or (pofix == '.csv'):
                merge_csvs(src+file, dest+file, stack=stack)
            elif pofix == '.npz':
                merge_npzs(src+file, dest+file, stack=stack)
                
            if report: print("Merged", src+file, "with", dest+file)
        else:
            # Simply moves the file
            if not exists(dest + file):                            
                rename(src + file, dest + file)            
                if report: print("Moved", src + file, "to", dest + file)            
            else:
                # Creates a new name for the src file by adding a digit
                #    before the postfix
                counter = 2
                while True:
                    new_name = dest + file.replace('.', str(counter)+'.')
                    if not exists(new_name):
                        rename(src + file, new_name)            
                        if report: print("Renamed", src + file, "to", new_name)
                        break
                    else:
                        counter += 1
            
    # Delete the old folder and its contents
    rmtree(src)

def merge_dir(src, dest, **kwargs):
    '''
    Merge two directories. Will go through all of the folders
    in the two root directories and will merge them. Does not
    merge files in the root.

    Parameters
    ----------
    src : string
        The root folder to merge with the dest.
    dest : string
        The root folder to be merged with.
    kwargs <- may add kwargs for the function merge_fldrs.

    Returns
    -------
    None.

    '''
    # Gets the list of folders for both directories
    dest_fldrs = []
    src_fldrs = []
    for unknown in listdir(dest): 
        if isdir(dest + unknown + '/'):
            dest_fldrs.append(unknown + '/')
    for unknown in listdir(src): 
        if isdir(src + unknown + '/'): 
            src_fldrs.append(unknown + '/') 
    
    # Go through the second directory and move the files to the first directory
    for f in src_fldrs:
        # Ensure that the same folder exists in dir 1
        if f in dest_fldrs:                   
            merge_fldrs(src+f, dest+f, **kwargs)
        else:
            # If no identical folder, then move the folder to the new location
            move(src+f, dest+f)