aton.file

Description

Functions to move files around.

Index

save() Save a Python object to a compressed binary file, as .bin.gz
load() Load a Python object from a compressed binary file, as .bin.gz
get() Check that a file exists, and return the full path
get_list() Get a list of the files inside a folder, applying optional filters
get_dir() Get the full path of a folder or the cwd
remove() Remove file or folder
backup() Backup a file including the current timestamp in the name
rename_on_folder() Batch rename files from a folder
rename_on_folders() Barch rename files from subfolders
copy_to_folders() Copy files to individual subfolders

  1"""
  2# Description
  3
  4Functions to move files around.
  5
  6
  7# Index
  8
  9| | |
 10| --- | --- |
 11| `save()`              | Save a Python object to a compressed binary file, as `.bin.gz` |
 12| `load()`              | Load a Python object from a compressed binary file, as `.bin.gz` |
 13| `get()`               | Check that a file exists, and return the full path |
 14| `get_list()`          | Get a list of the files inside a folder, applying optional filters |
 15| `get_dir()`           | Get the full path of a folder or the cwd |
 16| `remove()`            | Remove file or folder |
 17| `backup()`            | Backup a file including the current timestamp in the name |
 18| `rename_on_folder()`  | Batch rename files from a folder |
 19| `rename_on_folders()` | Barch rename files from subfolders |
 20| `copy_to_folders()`   | Copy files to individual subfolders |
 21
 22---
 23"""
 24
 25
 26import os
 27import shutil
 28import pickle
 29import gzip
 30from datetime import datetime
 31
 32
 33def save(object, filename:str=None):
 34    """Save a Python object in the current working directory as a compressed binary file, using [pickle](https://docs.python.org/3/library/pickle.html)."""
 35    filename = 'data' if filename is None else filename
 36    if not filename.endswith('.bin.gz'):
 37        filename += '.bin.gz'
 38    file = os.path.join(os.getcwd(), filename)
 39    with gzip.open(file, 'wb') as f:
 40        pickle.dump(object, f)
 41    print(f"Data saved and compressed to {file}")
 42
 43
 44def load(filepath:str='data.bin.gz'):
 45    """Load a Python object from a compressed binary file, using [pickle](https://docs.python.org/3/library/pickle.html).
 46
 47    Use only if you trust the person who sent you the file!
 48    """
 49    file_path = get(filepath, return_anyway=True)
 50    if not file_path:
 51        file_path = get(filepath + '.bin.gz', return_anyway=True)
 52    if not file_path:
 53        raise FileNotFoundError(f"Missing file {filepath}")
 54    with gzip.open(file_path, 'rb') as f:
 55        data = pickle.load(f)
 56    return data
 57
 58
 59def get(
 60        filepath,
 61        include=None,
 62        exclude=None,
 63        return_anyway:bool=False,
 64        ) -> str:
 65    """Check if `filepath` exists, and returns its full path.
 66
 67    Raises an error if the file is not found,
 68    unless `return_anyway = True`, in which case it returns None.
 69    This can be used to personalize errors.
 70
 71    If the provided string is a directory, it checks the files inside it.
 72    if there is only one file inside, it returns said file;
 73    if there are more files, it tries to filter them with the `include` filters
 74    (string or list of strings) to return a single file.
 75    If this fails, try using more strict filters to return a single file.
 76    """
 77    if os.path.isfile(filepath):
 78        return os.path.abspath(filepath)
 79    elif os.path.isdir(filepath):
 80        files = get_list(folder=filepath, include=include, exclude=exclude, abspath=True)
 81    elif return_anyway:
 82        return None
 83    else:
 84        raise FileNotFoundError('Nothing found at ' + str(filepath))
 85    # Return a single file
 86    if len(files) == 1:
 87        return files[0]
 88    elif return_anyway:
 89        return None
 90    elif len(files) == 0:
 91        raise FileNotFoundError("The following directory is empty (maybe due to the 'include' filters):\n" + filepath)
 92    else:
 93        raise FileExistsError(f'More than one file found, please apply a more strict filter. Found:\n{files}')    
 94
 95
 96def get_list(
 97        folder:str=None,
 98        include=None,
 99        exclude=None,
100        abspath:bool=True,
101        also_folders:bool=False,
102        only_folders:bool=False,
103    ) -> list:
104    """Return the files inside a `folder`, applying optional filters.
105
106    Only filenames containing all strings in the `include` list will be returned.
107    Filenames containing any string from the `exclude` list will be ignored.
108
109    The full paths are returned by default; to get only the base names, set `abspath = False`.
110    The CWD folder is used by default if no `folder` is provided.
111
112    By default it only returns files, not folders.
113    It can optionally also/only returns folders,
114    with `also_folders` or `only_folders` set to `True`.
115    """
116    if not folder:
117        folder = os.getcwd()
118    if os.path.isfile(folder):
119        folder = os.path.dirname(folder)
120    if not os.path.isdir(folder):
121        raise FileNotFoundError('Directory not found: ' + folder)
122    folder = os.path.abspath(folder)
123    files = os.listdir(folder)
124    if not files:
125        return []
126    # Absolute paths?
127    if abspath:
128        files = [os.path.join(folder, f) for f in files]
129    # Should we keep only folders, also folders, or only files?
130    if only_folders:
131        files = [f for f in files if os.path.isdir(f)]
132    elif not also_folders:
133        files = [f for f in files if not os.path.isdir(f if abspath else os.path.join(folder, f))]
134    # Apply filters if provided
135    if include is not None:
136        # Ensure include filters is always a list
137        if not isinstance(include, list):
138            include = [str(include)]
139        # Normalize filter names
140        include = [os.path.basename(i) for i in include]
141        # Only keep files that contain all filters
142        files = [f for f in files if all(filter_str in os.path.basename(f) for filter_str in include)]
143    # Remove files containing any string from the exclude list
144    if exclude is not None:
145        # Ensure exclude filters is always a list
146        if not isinstance(exclude, list):
147            exclude = [str(exclude)]
148        # Normalize ignoring filter names
149        exclude = [os.path.basename(i) for i in exclude]
150        # Exclude the corresponding files
151        files = [f for f in files if not any(filter_str in os.path.basename(f) for filter_str in exclude)]
152    files.sort()
153    return files
154
155
156def get_dir(folder=None) -> str:
157    """Returns the full path of `folder` or the parent folder if it's a file.
158
159    If none is provided, the current working directory is returned.
160    """
161    if folder == None:
162        path = os.getcwd()
163    elif os.path.isdir(folder):
164        path = os.path.realpath(folder)
165    elif not os.path.isdir(folder):
166        if os.path.isfile:
167            path = os.path.dirname(folder)
168            path = os.path.realpath(path)
169        else:
170            raise FileNotFoundError(f'Missing folder at {folder}')
171    return path
172
173
174def remove(filepath:str) -> None:
175    """Removes the given file or folder at `filepath`.
176
177    > WARNING: Removing stuff is always dangerous, be careful!
178    """
179    if filepath is None:
180        return None  # It did not exist in the first place
181    elif os.path.isfile(filepath):
182        os.remove(filepath)
183    elif os.path.isdir(filepath):
184        shutil.rmtree(filepath)
185    else:
186        return None  # It did not exist in the first place
187    return None
188
189
190def backup(
191        filepath:str,
192        keep:bool=True,
193        label:str='backup',
194        timestamp:str='%y%m%dT%H%M%S',
195        ) -> str:
196    """Backup a file including the current timestamp in the name.
197
198    Keeps the original file by default, unless `keep = False`.
199    Appends a '_backup' `label` at the end of the filename.
200    The timestamp can be optionally customised or disabled.
201    Returns the new backup filepath.
202    """
203    filepath = get(filepath)
204    now = ''
205    if label:
206        label = '_' + label
207    if timestamp:
208        now = '_' + datetime.now().strftime(timestamp)
209    dir_path = os.path.dirname(filepath)
210    basename = os.path.basename(filepath)
211    name, ext = os.path.splitext(basename)
212    new_name = name + label + now + ext
213    new_filepath = os.path.join(dir_path, new_name)
214    if keep:
215        shutil.copy(filepath, new_filepath)
216    else:
217        shutil.move(filepath, new_filepath)
218    return new_filepath
219
220
221def rename_on_folder(
222        old:str,
223        new:str,
224        folder=None,
225    ) -> None:
226    """Batch renames files in the given `folder`.
227
228    Replaces the `old` string by `new` string.
229    If no folder is provided, the current working directory is used.
230    """
231    if folder is None:
232        files = os.listdir('.')
233    elif os.path.isdir(folder):
234        file_list = os.listdir(folder)
235        files = []
236        for file in file_list:
237            file_path = os.path.join(folder, file)
238            files.append(file_path)
239    elif os.path.isdir(os.path.join(os.getcwd(), folder)):
240        folder_path = os.path.join(os.getcwd(), folder)
241        file_list = os.listdir(folder_path)
242        files = []
243        for file in file_list:
244            file_path = os.path.join(folder_path, file)
245            files.append(file_path)
246    else:
247        raise FileNotFoundError('Missing folder at ' + folder + ' or in the CWD ' + os.getcwd())
248    for f in files:
249        if old in f:
250            os.rename(f, f.replace(old, new))
251    return None
252
253
254def rename_on_folders(
255        old:str,
256        new:str,
257        folder=None,
258    ) -> None:
259    """Renames the files inside the subfolders in the parent `folder`.
260    
261    Renames from an `old` string to the `new` string.
262    If no `folder` is provided, the current working directory is used.
263    """
264    if folder is None:
265        things = os.listdir('.')
266    elif os.path.isdir(folder):
267        things = os.listdir(folder)
268    elif os.path.isdir(os.path.join(os.getcwd(), folder)):
269        things = os.listdir(os.path.join(os.getcwd(), folder))
270    else:
271        raise FileNotFoundError('Missing folder at ' + folder + ' or in the CWD ' + os.getcwd())
272    for d in things:
273        if os.path.isdir(d):
274            for f in os.listdir(d):
275                if old in f:
276                    old_file = os.path.join(d, f)
277                    new_file = os.path.join(d, f.replace(old, new))
278                    os.rename(old_file, new_file)
279    return None
280
281
282def copy_to_folders(
283        folder=None,
284        extension:str=None,
285        strings_to_delete:list=[],
286    ) -> None:
287    """Copies the files from the parent `folder` with the given `extension` to individual subfolders.
288
289    The subfolders are named as the original files,
290    removing the strings from the `strings_to_delete` list.
291    If no `folder` is provided, it runs in the current working directory.
292    """
293    if folder is None:
294        folder = os.getcwd()
295    old_files = get_list(folder=folder, include=extension)
296    if old_files is None:
297        raise ValueError('No ' + extension + ' files found in path!')
298    for old_file in old_files:
299        new_file = old_file
300        for string in strings_to_delete:
301            new_file = new_file.replace(string, '')
302        path = new_file.replace(extension, '')
303        os.makedirs(path, exist_ok=True)
304        new_file_path = os.path.join(path, new_file)
305        shutil.copy(old_file, new_file_path)
306    return None
def save(object, filename: str = None):
34def save(object, filename:str=None):
35    """Save a Python object in the current working directory as a compressed binary file, using [pickle](https://docs.python.org/3/library/pickle.html)."""
36    filename = 'data' if filename is None else filename
37    if not filename.endswith('.bin.gz'):
38        filename += '.bin.gz'
39    file = os.path.join(os.getcwd(), filename)
40    with gzip.open(file, 'wb') as f:
41        pickle.dump(object, f)
42    print(f"Data saved and compressed to {file}")

Save a Python object in the current working directory as a compressed binary file, using pickle.

def load(filepath: str = 'data.bin.gz'):
45def load(filepath:str='data.bin.gz'):
46    """Load a Python object from a compressed binary file, using [pickle](https://docs.python.org/3/library/pickle.html).
47
48    Use only if you trust the person who sent you the file!
49    """
50    file_path = get(filepath, return_anyway=True)
51    if not file_path:
52        file_path = get(filepath + '.bin.gz', return_anyway=True)
53    if not file_path:
54        raise FileNotFoundError(f"Missing file {filepath}")
55    with gzip.open(file_path, 'rb') as f:
56        data = pickle.load(f)
57    return data

Load a Python object from a compressed binary file, using pickle.

Use only if you trust the person who sent you the file!

def get(filepath, include=None, exclude=None, return_anyway: bool = False) -> str:
60def get(
61        filepath,
62        include=None,
63        exclude=None,
64        return_anyway:bool=False,
65        ) -> str:
66    """Check if `filepath` exists, and returns its full path.
67
68    Raises an error if the file is not found,
69    unless `return_anyway = True`, in which case it returns None.
70    This can be used to personalize errors.
71
72    If the provided string is a directory, it checks the files inside it.
73    if there is only one file inside, it returns said file;
74    if there are more files, it tries to filter them with the `include` filters
75    (string or list of strings) to return a single file.
76    If this fails, try using more strict filters to return a single file.
77    """
78    if os.path.isfile(filepath):
79        return os.path.abspath(filepath)
80    elif os.path.isdir(filepath):
81        files = get_list(folder=filepath, include=include, exclude=exclude, abspath=True)
82    elif return_anyway:
83        return None
84    else:
85        raise FileNotFoundError('Nothing found at ' + str(filepath))
86    # Return a single file
87    if len(files) == 1:
88        return files[0]
89    elif return_anyway:
90        return None
91    elif len(files) == 0:
92        raise FileNotFoundError("The following directory is empty (maybe due to the 'include' filters):\n" + filepath)
93    else:
94        raise FileExistsError(f'More than one file found, please apply a more strict filter. Found:\n{files}')    

Check if filepath exists, and returns its full path.

Raises an error if the file is not found, unless return_anyway = True, in which case it returns None. This can be used to personalize errors.

If the provided string is a directory, it checks the files inside it. if there is only one file inside, it returns said file; if there are more files, it tries to filter them with the include filters (string or list of strings) to return a single file. If this fails, try using more strict filters to return a single file.

def get_list( folder: str = None, include=None, exclude=None, abspath: bool = True, also_folders: bool = False, only_folders: bool = False) -> list:
 97def get_list(
 98        folder:str=None,
 99        include=None,
100        exclude=None,
101        abspath:bool=True,
102        also_folders:bool=False,
103        only_folders:bool=False,
104    ) -> list:
105    """Return the files inside a `folder`, applying optional filters.
106
107    Only filenames containing all strings in the `include` list will be returned.
108    Filenames containing any string from the `exclude` list will be ignored.
109
110    The full paths are returned by default; to get only the base names, set `abspath = False`.
111    The CWD folder is used by default if no `folder` is provided.
112
113    By default it only returns files, not folders.
114    It can optionally also/only returns folders,
115    with `also_folders` or `only_folders` set to `True`.
116    """
117    if not folder:
118        folder = os.getcwd()
119    if os.path.isfile(folder):
120        folder = os.path.dirname(folder)
121    if not os.path.isdir(folder):
122        raise FileNotFoundError('Directory not found: ' + folder)
123    folder = os.path.abspath(folder)
124    files = os.listdir(folder)
125    if not files:
126        return []
127    # Absolute paths?
128    if abspath:
129        files = [os.path.join(folder, f) for f in files]
130    # Should we keep only folders, also folders, or only files?
131    if only_folders:
132        files = [f for f in files if os.path.isdir(f)]
133    elif not also_folders:
134        files = [f for f in files if not os.path.isdir(f if abspath else os.path.join(folder, f))]
135    # Apply filters if provided
136    if include is not None:
137        # Ensure include filters is always a list
138        if not isinstance(include, list):
139            include = [str(include)]
140        # Normalize filter names
141        include = [os.path.basename(i) for i in include]
142        # Only keep files that contain all filters
143        files = [f for f in files if all(filter_str in os.path.basename(f) for filter_str in include)]
144    # Remove files containing any string from the exclude list
145    if exclude is not None:
146        # Ensure exclude filters is always a list
147        if not isinstance(exclude, list):
148            exclude = [str(exclude)]
149        # Normalize ignoring filter names
150        exclude = [os.path.basename(i) for i in exclude]
151        # Exclude the corresponding files
152        files = [f for f in files if not any(filter_str in os.path.basename(f) for filter_str in exclude)]
153    files.sort()
154    return files

Return the files inside a folder, applying optional filters.

Only filenames containing all strings in the include list will be returned. Filenames containing any string from the exclude list will be ignored.

The full paths are returned by default; to get only the base names, set abspath = False. The CWD folder is used by default if no folder is provided.

By default it only returns files, not folders. It can optionally also/only returns folders, with also_folders or only_folders set to True.

def get_dir(folder=None) -> str:
157def get_dir(folder=None) -> str:
158    """Returns the full path of `folder` or the parent folder if it's a file.
159
160    If none is provided, the current working directory is returned.
161    """
162    if folder == None:
163        path = os.getcwd()
164    elif os.path.isdir(folder):
165        path = os.path.realpath(folder)
166    elif not os.path.isdir(folder):
167        if os.path.isfile:
168            path = os.path.dirname(folder)
169            path = os.path.realpath(path)
170        else:
171            raise FileNotFoundError(f'Missing folder at {folder}')
172    return path

Returns the full path of folder or the parent folder if it's a file.

If none is provided, the current working directory is returned.

def remove(filepath: str) -> None:
175def remove(filepath:str) -> None:
176    """Removes the given file or folder at `filepath`.
177
178    > WARNING: Removing stuff is always dangerous, be careful!
179    """
180    if filepath is None:
181        return None  # It did not exist in the first place
182    elif os.path.isfile(filepath):
183        os.remove(filepath)
184    elif os.path.isdir(filepath):
185        shutil.rmtree(filepath)
186    else:
187        return None  # It did not exist in the first place
188    return None

Removes the given file or folder at filepath.

WARNING: Removing stuff is always dangerous, be careful!

def backup( filepath: str, keep: bool = True, label: str = 'backup', timestamp: str = '%y%m%dT%H%M%S') -> str:
191def backup(
192        filepath:str,
193        keep:bool=True,
194        label:str='backup',
195        timestamp:str='%y%m%dT%H%M%S',
196        ) -> str:
197    """Backup a file including the current timestamp in the name.
198
199    Keeps the original file by default, unless `keep = False`.
200    Appends a '_backup' `label` at the end of the filename.
201    The timestamp can be optionally customised or disabled.
202    Returns the new backup filepath.
203    """
204    filepath = get(filepath)
205    now = ''
206    if label:
207        label = '_' + label
208    if timestamp:
209        now = '_' + datetime.now().strftime(timestamp)
210    dir_path = os.path.dirname(filepath)
211    basename = os.path.basename(filepath)
212    name, ext = os.path.splitext(basename)
213    new_name = name + label + now + ext
214    new_filepath = os.path.join(dir_path, new_name)
215    if keep:
216        shutil.copy(filepath, new_filepath)
217    else:
218        shutil.move(filepath, new_filepath)
219    return new_filepath

Backup a file including the current timestamp in the name.

Keeps the original file by default, unless keep = False. Appends a '_backup' label at the end of the filename. The timestamp can be optionally customised or disabled. Returns the new backup filepath.

def rename_on_folder(old: str, new: str, folder=None) -> None:
222def rename_on_folder(
223        old:str,
224        new:str,
225        folder=None,
226    ) -> None:
227    """Batch renames files in the given `folder`.
228
229    Replaces the `old` string by `new` string.
230    If no folder is provided, the current working directory is used.
231    """
232    if folder is None:
233        files = os.listdir('.')
234    elif os.path.isdir(folder):
235        file_list = os.listdir(folder)
236        files = []
237        for file in file_list:
238            file_path = os.path.join(folder, file)
239            files.append(file_path)
240    elif os.path.isdir(os.path.join(os.getcwd(), folder)):
241        folder_path = os.path.join(os.getcwd(), folder)
242        file_list = os.listdir(folder_path)
243        files = []
244        for file in file_list:
245            file_path = os.path.join(folder_path, file)
246            files.append(file_path)
247    else:
248        raise FileNotFoundError('Missing folder at ' + folder + ' or in the CWD ' + os.getcwd())
249    for f in files:
250        if old in f:
251            os.rename(f, f.replace(old, new))
252    return None

Batch renames files in the given folder.

Replaces the old string by new string. If no folder is provided, the current working directory is used.

def rename_on_folders(old: str, new: str, folder=None) -> None:
255def rename_on_folders(
256        old:str,
257        new:str,
258        folder=None,
259    ) -> None:
260    """Renames the files inside the subfolders in the parent `folder`.
261    
262    Renames from an `old` string to the `new` string.
263    If no `folder` is provided, the current working directory is used.
264    """
265    if folder is None:
266        things = os.listdir('.')
267    elif os.path.isdir(folder):
268        things = os.listdir(folder)
269    elif os.path.isdir(os.path.join(os.getcwd(), folder)):
270        things = os.listdir(os.path.join(os.getcwd(), folder))
271    else:
272        raise FileNotFoundError('Missing folder at ' + folder + ' or in the CWD ' + os.getcwd())
273    for d in things:
274        if os.path.isdir(d):
275            for f in os.listdir(d):
276                if old in f:
277                    old_file = os.path.join(d, f)
278                    new_file = os.path.join(d, f.replace(old, new))
279                    os.rename(old_file, new_file)
280    return None

Renames the files inside the subfolders in the parent folder.

Renames from an old string to the new string. If no folder is provided, the current working directory is used.

def copy_to_folders(folder=None, extension: str = None, strings_to_delete: list = []) -> None:
283def copy_to_folders(
284        folder=None,
285        extension:str=None,
286        strings_to_delete:list=[],
287    ) -> None:
288    """Copies the files from the parent `folder` with the given `extension` to individual subfolders.
289
290    The subfolders are named as the original files,
291    removing the strings from the `strings_to_delete` list.
292    If no `folder` is provided, it runs in the current working directory.
293    """
294    if folder is None:
295        folder = os.getcwd()
296    old_files = get_list(folder=folder, include=extension)
297    if old_files is None:
298        raise ValueError('No ' + extension + ' files found in path!')
299    for old_file in old_files:
300        new_file = old_file
301        for string in strings_to_delete:
302            new_file = new_file.replace(string, '')
303        path = new_file.replace(extension, '')
304        os.makedirs(path, exist_ok=True)
305        new_file_path = os.path.join(path, new_file)
306        shutil.copy(old_file, new_file_path)
307    return None

Copies the files from the parent folder with the given extension to individual subfolders.

The subfolders are named as the original files, removing the strings from the strings_to_delete list. If no folder is provided, it runs in the current working directory.