aton.file
Description
Functions to move files around.
Index
save() |
Save a Python object to a compressed binary file, as .bin.gz |
load() |
Load a Python object from a compressed binary file, as .bin.gz |
get() |
Check that a file exists, and return the full path |
get_list() |
Get a list of the files inside a folder, applying optional filters |
get_dir() |
Get the full path of a folder or the cwd |
remove() |
Remove file or folder |
backup() |
Backup a file including the current timestamp in the name |
rename_on_folder() |
Batch rename files from a folder |
rename_on_folders() |
Barch rename files from subfolders |
copy_to_folders() |
Copy files to individual subfolders |
1""" 2# Description 3 4Functions to move files around. 5 6 7# Index 8 9| | | 10| --- | --- | 11| `save()` | Save a Python object to a compressed binary file, as `.bin.gz` | 12| `load()` | Load a Python object from a compressed binary file, as `.bin.gz` | 13| `get()` | Check that a file exists, and return the full path | 14| `get_list()` | Get a list of the files inside a folder, applying optional filters | 15| `get_dir()` | Get the full path of a folder or the cwd | 16| `remove()` | Remove file or folder | 17| `backup()` | Backup a file including the current timestamp in the name | 18| `rename_on_folder()` | Batch rename files from a folder | 19| `rename_on_folders()` | Barch rename files from subfolders | 20| `copy_to_folders()` | Copy files to individual subfolders | 21 22--- 23""" 24 25 26import os 27import shutil 28import pickle 29import gzip 30from datetime import datetime 31 32 33def save(object, filename:str=None): 34 """Save a Python object in the current working directory as a compressed binary file, using [pickle](https://docs.python.org/3/library/pickle.html).""" 35 filename = 'data' if filename is None else filename 36 if not filename.endswith('.bin.gz'): 37 filename += '.bin.gz' 38 file = os.path.join(os.getcwd(), filename) 39 with gzip.open(file, 'wb') as f: 40 pickle.dump(object, f) 41 print(f"Data saved and compressed to {file}") 42 43 44def load(filepath:str='data.bin.gz'): 45 """Load a Python object from a compressed binary file, using [pickle](https://docs.python.org/3/library/pickle.html). 46 47 Use only if you trust the person who sent you the file! 48 """ 49 file_path = get(filepath, return_anyway=True) 50 if not file_path: 51 file_path = get(filepath + '.bin.gz', return_anyway=True) 52 if not file_path: 53 raise FileNotFoundError(f"Missing file {filepath}") 54 with gzip.open(file_path, 'rb') as f: 55 data = pickle.load(f) 56 return data 57 58 59def get( 60 filepath, 61 include=None, 62 exclude=None, 63 return_anyway:bool=False, 64 ) -> str: 65 """Check if `filepath` exists, and returns its full path. 66 67 Raises an error if the file is not found, 68 unless `return_anyway = True`, in which case it returns None. 69 This can be used to personalize errors. 70 71 If the provided string is a directory, it checks the files inside it. 72 if there is only one file inside, it returns said file; 73 if there are more files, it tries to filter them with the `include` filters 74 (string or list of strings) to return a single file. 75 If this fails, try using more strict filters to return a single file. 76 """ 77 if os.path.isfile(filepath): 78 return os.path.abspath(filepath) 79 elif os.path.isdir(filepath): 80 files = get_list(folder=filepath, include=include, exclude=exclude, abspath=True) 81 elif return_anyway: 82 return None 83 else: 84 raise FileNotFoundError('Nothing found at ' + str(filepath)) 85 # Return a single file 86 if len(files) == 1: 87 return files[0] 88 elif return_anyway: 89 return None 90 elif len(files) == 0: 91 raise FileNotFoundError("The following directory is empty (maybe due to the 'include' filters):\n" + filepath) 92 else: 93 raise FileExistsError(f'More than one file found, please apply a more strict filter. Found:\n{files}') 94 95 96def get_list( 97 folder:str=None, 98 include=None, 99 exclude=None, 100 abspath:bool=True, 101 also_folders:bool=False, 102 only_folders:bool=False, 103 ) -> list: 104 """Return the files inside a `folder`, applying optional filters. 105 106 Only filenames containing all strings in the `include` list will be returned. 107 Filenames containing any string from the `exclude` list will be ignored. 108 109 The full paths are returned by default; to get only the base names, set `abspath = False`. 110 The CWD folder is used by default if no `folder` is provided. 111 112 By default it only returns files, not folders. 113 It can optionally also/only returns folders, 114 with `also_folders` or `only_folders` set to `True`. 115 """ 116 if not folder: 117 folder = os.getcwd() 118 if os.path.isfile(folder): 119 folder = os.path.dirname(folder) 120 if not os.path.isdir(folder): 121 raise FileNotFoundError('Directory not found: ' + folder) 122 folder = os.path.abspath(folder) 123 files = os.listdir(folder) 124 if not files: 125 return [] 126 # Absolute paths? 127 if abspath: 128 files = [os.path.join(folder, f) for f in files] 129 # Should we keep only folders, also folders, or only files? 130 if only_folders: 131 files = [f for f in files if os.path.isdir(f)] 132 elif not also_folders: 133 files = [f for f in files if not os.path.isdir(f if abspath else os.path.join(folder, f))] 134 # Apply filters if provided 135 if include is not None: 136 # Ensure include filters is always a list 137 if not isinstance(include, list): 138 include = [str(include)] 139 # Normalize filter names 140 include = [os.path.basename(i) for i in include] 141 # Only keep files that contain all filters 142 files = [f for f in files if all(filter_str in os.path.basename(f) for filter_str in include)] 143 # Remove files containing any string from the exclude list 144 if exclude is not None: 145 # Ensure exclude filters is always a list 146 if not isinstance(exclude, list): 147 exclude = [str(exclude)] 148 # Normalize ignoring filter names 149 exclude = [os.path.basename(i) for i in exclude] 150 # Exclude the corresponding files 151 files = [f for f in files if not any(filter_str in os.path.basename(f) for filter_str in exclude)] 152 files.sort() 153 return files 154 155 156def get_dir(folder=None) -> str: 157 """Returns the full path of `folder` or the parent folder if it's a file. 158 159 If none is provided, the current working directory is returned. 160 """ 161 if folder == None: 162 path = os.getcwd() 163 elif os.path.isdir(folder): 164 path = os.path.realpath(folder) 165 elif not os.path.isdir(folder): 166 if os.path.isfile: 167 path = os.path.dirname(folder) 168 path = os.path.realpath(path) 169 else: 170 raise FileNotFoundError(f'Missing folder at {folder}') 171 return path 172 173 174def remove(filepath:str) -> None: 175 """Removes the given file or folder at `filepath`. 176 177 > WARNING: Removing stuff is always dangerous, be careful! 178 """ 179 if filepath is None: 180 return None # It did not exist in the first place 181 elif os.path.isfile(filepath): 182 os.remove(filepath) 183 elif os.path.isdir(filepath): 184 shutil.rmtree(filepath) 185 else: 186 return None # It did not exist in the first place 187 return None 188 189 190def backup( 191 filepath:str, 192 keep:bool=True, 193 label:str='backup', 194 timestamp:str='%y%m%dT%H%M%S', 195 ) -> str: 196 """Backup a file including the current timestamp in the name. 197 198 Keeps the original file by default, unless `keep = False`. 199 Appends a '_backup' `label` at the end of the filename. 200 The timestamp can be optionally customised or disabled. 201 Returns the new backup filepath. 202 """ 203 filepath = get(filepath) 204 now = '' 205 if label: 206 label = '_' + label 207 if timestamp: 208 now = '_' + datetime.now().strftime(timestamp) 209 dir_path = os.path.dirname(filepath) 210 basename = os.path.basename(filepath) 211 name, ext = os.path.splitext(basename) 212 new_name = name + label + now + ext 213 new_filepath = os.path.join(dir_path, new_name) 214 if keep: 215 shutil.copy(filepath, new_filepath) 216 else: 217 shutil.move(filepath, new_filepath) 218 return new_filepath 219 220 221def rename_on_folder( 222 old:str, 223 new:str, 224 folder=None, 225 ) -> None: 226 """Batch renames files in the given `folder`. 227 228 Replaces the `old` string by `new` string. 229 If no folder is provided, the current working directory is used. 230 """ 231 if folder is None: 232 files = os.listdir('.') 233 elif os.path.isdir(folder): 234 file_list = os.listdir(folder) 235 files = [] 236 for file in file_list: 237 file_path = os.path.join(folder, file) 238 files.append(file_path) 239 elif os.path.isdir(os.path.join(os.getcwd(), folder)): 240 folder_path = os.path.join(os.getcwd(), folder) 241 file_list = os.listdir(folder_path) 242 files = [] 243 for file in file_list: 244 file_path = os.path.join(folder_path, file) 245 files.append(file_path) 246 else: 247 raise FileNotFoundError('Missing folder at ' + folder + ' or in the CWD ' + os.getcwd()) 248 for f in files: 249 if old in f: 250 os.rename(f, f.replace(old, new)) 251 return None 252 253 254def rename_on_folders( 255 old:str, 256 new:str, 257 folder=None, 258 ) -> None: 259 """Renames the files inside the subfolders in the parent `folder`. 260 261 Renames from an `old` string to the `new` string. 262 If no `folder` is provided, the current working directory is used. 263 """ 264 if folder is None: 265 things = os.listdir('.') 266 elif os.path.isdir(folder): 267 things = os.listdir(folder) 268 elif os.path.isdir(os.path.join(os.getcwd(), folder)): 269 things = os.listdir(os.path.join(os.getcwd(), folder)) 270 else: 271 raise FileNotFoundError('Missing folder at ' + folder + ' or in the CWD ' + os.getcwd()) 272 for d in things: 273 if os.path.isdir(d): 274 for f in os.listdir(d): 275 if old in f: 276 old_file = os.path.join(d, f) 277 new_file = os.path.join(d, f.replace(old, new)) 278 os.rename(old_file, new_file) 279 return None 280 281 282def copy_to_folders( 283 folder=None, 284 extension:str=None, 285 strings_to_delete:list=[], 286 ) -> None: 287 """Copies the files from the parent `folder` with the given `extension` to individual subfolders. 288 289 The subfolders are named as the original files, 290 removing the strings from the `strings_to_delete` list. 291 If no `folder` is provided, it runs in the current working directory. 292 """ 293 if folder is None: 294 folder = os.getcwd() 295 old_files = get_list(folder=folder, include=extension) 296 if old_files is None: 297 raise ValueError('No ' + extension + ' files found in path!') 298 for old_file in old_files: 299 new_file = old_file 300 for string in strings_to_delete: 301 new_file = new_file.replace(string, '') 302 path = new_file.replace(extension, '') 303 os.makedirs(path, exist_ok=True) 304 new_file_path = os.path.join(path, new_file) 305 shutil.copy(old_file, new_file_path) 306 return None
34def save(object, filename:str=None): 35 """Save a Python object in the current working directory as a compressed binary file, using [pickle](https://docs.python.org/3/library/pickle.html).""" 36 filename = 'data' if filename is None else filename 37 if not filename.endswith('.bin.gz'): 38 filename += '.bin.gz' 39 file = os.path.join(os.getcwd(), filename) 40 with gzip.open(file, 'wb') as f: 41 pickle.dump(object, f) 42 print(f"Data saved and compressed to {file}")
Save a Python object in the current working directory as a compressed binary file, using pickle.
45def load(filepath:str='data.bin.gz'): 46 """Load a Python object from a compressed binary file, using [pickle](https://docs.python.org/3/library/pickle.html). 47 48 Use only if you trust the person who sent you the file! 49 """ 50 file_path = get(filepath, return_anyway=True) 51 if not file_path: 52 file_path = get(filepath + '.bin.gz', return_anyway=True) 53 if not file_path: 54 raise FileNotFoundError(f"Missing file {filepath}") 55 with gzip.open(file_path, 'rb') as f: 56 data = pickle.load(f) 57 return data
Load a Python object from a compressed binary file, using pickle.
Use only if you trust the person who sent you the file!
60def get( 61 filepath, 62 include=None, 63 exclude=None, 64 return_anyway:bool=False, 65 ) -> str: 66 """Check if `filepath` exists, and returns its full path. 67 68 Raises an error if the file is not found, 69 unless `return_anyway = True`, in which case it returns None. 70 This can be used to personalize errors. 71 72 If the provided string is a directory, it checks the files inside it. 73 if there is only one file inside, it returns said file; 74 if there are more files, it tries to filter them with the `include` filters 75 (string or list of strings) to return a single file. 76 If this fails, try using more strict filters to return a single file. 77 """ 78 if os.path.isfile(filepath): 79 return os.path.abspath(filepath) 80 elif os.path.isdir(filepath): 81 files = get_list(folder=filepath, include=include, exclude=exclude, abspath=True) 82 elif return_anyway: 83 return None 84 else: 85 raise FileNotFoundError('Nothing found at ' + str(filepath)) 86 # Return a single file 87 if len(files) == 1: 88 return files[0] 89 elif return_anyway: 90 return None 91 elif len(files) == 0: 92 raise FileNotFoundError("The following directory is empty (maybe due to the 'include' filters):\n" + filepath) 93 else: 94 raise FileExistsError(f'More than one file found, please apply a more strict filter. Found:\n{files}')
Check if filepath exists, and returns its full path.
Raises an error if the file is not found,
unless return_anyway = True, in which case it returns None.
This can be used to personalize errors.
If the provided string is a directory, it checks the files inside it.
if there is only one file inside, it returns said file;
if there are more files, it tries to filter them with the include filters
(string or list of strings) to return a single file.
If this fails, try using more strict filters to return a single file.
97def get_list( 98 folder:str=None, 99 include=None, 100 exclude=None, 101 abspath:bool=True, 102 also_folders:bool=False, 103 only_folders:bool=False, 104 ) -> list: 105 """Return the files inside a `folder`, applying optional filters. 106 107 Only filenames containing all strings in the `include` list will be returned. 108 Filenames containing any string from the `exclude` list will be ignored. 109 110 The full paths are returned by default; to get only the base names, set `abspath = False`. 111 The CWD folder is used by default if no `folder` is provided. 112 113 By default it only returns files, not folders. 114 It can optionally also/only returns folders, 115 with `also_folders` or `only_folders` set to `True`. 116 """ 117 if not folder: 118 folder = os.getcwd() 119 if os.path.isfile(folder): 120 folder = os.path.dirname(folder) 121 if not os.path.isdir(folder): 122 raise FileNotFoundError('Directory not found: ' + folder) 123 folder = os.path.abspath(folder) 124 files = os.listdir(folder) 125 if not files: 126 return [] 127 # Absolute paths? 128 if abspath: 129 files = [os.path.join(folder, f) for f in files] 130 # Should we keep only folders, also folders, or only files? 131 if only_folders: 132 files = [f for f in files if os.path.isdir(f)] 133 elif not also_folders: 134 files = [f for f in files if not os.path.isdir(f if abspath else os.path.join(folder, f))] 135 # Apply filters if provided 136 if include is not None: 137 # Ensure include filters is always a list 138 if not isinstance(include, list): 139 include = [str(include)] 140 # Normalize filter names 141 include = [os.path.basename(i) for i in include] 142 # Only keep files that contain all filters 143 files = [f for f in files if all(filter_str in os.path.basename(f) for filter_str in include)] 144 # Remove files containing any string from the exclude list 145 if exclude is not None: 146 # Ensure exclude filters is always a list 147 if not isinstance(exclude, list): 148 exclude = [str(exclude)] 149 # Normalize ignoring filter names 150 exclude = [os.path.basename(i) for i in exclude] 151 # Exclude the corresponding files 152 files = [f for f in files if not any(filter_str in os.path.basename(f) for filter_str in exclude)] 153 files.sort() 154 return files
Return the files inside a folder, applying optional filters.
Only filenames containing all strings in the include list will be returned.
Filenames containing any string from the exclude list will be ignored.
The full paths are returned by default; to get only the base names, set abspath = False.
The CWD folder is used by default if no folder is provided.
By default it only returns files, not folders.
It can optionally also/only returns folders,
with also_folders or only_folders set to True.
157def get_dir(folder=None) -> str: 158 """Returns the full path of `folder` or the parent folder if it's a file. 159 160 If none is provided, the current working directory is returned. 161 """ 162 if folder == None: 163 path = os.getcwd() 164 elif os.path.isdir(folder): 165 path = os.path.realpath(folder) 166 elif not os.path.isdir(folder): 167 if os.path.isfile: 168 path = os.path.dirname(folder) 169 path = os.path.realpath(path) 170 else: 171 raise FileNotFoundError(f'Missing folder at {folder}') 172 return path
Returns the full path of folder or the parent folder if it's a file.
If none is provided, the current working directory is returned.
175def remove(filepath:str) -> None: 176 """Removes the given file or folder at `filepath`. 177 178 > WARNING: Removing stuff is always dangerous, be careful! 179 """ 180 if filepath is None: 181 return None # It did not exist in the first place 182 elif os.path.isfile(filepath): 183 os.remove(filepath) 184 elif os.path.isdir(filepath): 185 shutil.rmtree(filepath) 186 else: 187 return None # It did not exist in the first place 188 return None
Removes the given file or folder at filepath.
WARNING: Removing stuff is always dangerous, be careful!
191def backup( 192 filepath:str, 193 keep:bool=True, 194 label:str='backup', 195 timestamp:str='%y%m%dT%H%M%S', 196 ) -> str: 197 """Backup a file including the current timestamp in the name. 198 199 Keeps the original file by default, unless `keep = False`. 200 Appends a '_backup' `label` at the end of the filename. 201 The timestamp can be optionally customised or disabled. 202 Returns the new backup filepath. 203 """ 204 filepath = get(filepath) 205 now = '' 206 if label: 207 label = '_' + label 208 if timestamp: 209 now = '_' + datetime.now().strftime(timestamp) 210 dir_path = os.path.dirname(filepath) 211 basename = os.path.basename(filepath) 212 name, ext = os.path.splitext(basename) 213 new_name = name + label + now + ext 214 new_filepath = os.path.join(dir_path, new_name) 215 if keep: 216 shutil.copy(filepath, new_filepath) 217 else: 218 shutil.move(filepath, new_filepath) 219 return new_filepath
Backup a file including the current timestamp in the name.
Keeps the original file by default, unless keep = False.
Appends a '_backup' label at the end of the filename.
The timestamp can be optionally customised or disabled.
Returns the new backup filepath.
222def rename_on_folder( 223 old:str, 224 new:str, 225 folder=None, 226 ) -> None: 227 """Batch renames files in the given `folder`. 228 229 Replaces the `old` string by `new` string. 230 If no folder is provided, the current working directory is used. 231 """ 232 if folder is None: 233 files = os.listdir('.') 234 elif os.path.isdir(folder): 235 file_list = os.listdir(folder) 236 files = [] 237 for file in file_list: 238 file_path = os.path.join(folder, file) 239 files.append(file_path) 240 elif os.path.isdir(os.path.join(os.getcwd(), folder)): 241 folder_path = os.path.join(os.getcwd(), folder) 242 file_list = os.listdir(folder_path) 243 files = [] 244 for file in file_list: 245 file_path = os.path.join(folder_path, file) 246 files.append(file_path) 247 else: 248 raise FileNotFoundError('Missing folder at ' + folder + ' or in the CWD ' + os.getcwd()) 249 for f in files: 250 if old in f: 251 os.rename(f, f.replace(old, new)) 252 return None
Batch renames files in the given folder.
Replaces the old string by new string.
If no folder is provided, the current working directory is used.
255def rename_on_folders( 256 old:str, 257 new:str, 258 folder=None, 259 ) -> None: 260 """Renames the files inside the subfolders in the parent `folder`. 261 262 Renames from an `old` string to the `new` string. 263 If no `folder` is provided, the current working directory is used. 264 """ 265 if folder is None: 266 things = os.listdir('.') 267 elif os.path.isdir(folder): 268 things = os.listdir(folder) 269 elif os.path.isdir(os.path.join(os.getcwd(), folder)): 270 things = os.listdir(os.path.join(os.getcwd(), folder)) 271 else: 272 raise FileNotFoundError('Missing folder at ' + folder + ' or in the CWD ' + os.getcwd()) 273 for d in things: 274 if os.path.isdir(d): 275 for f in os.listdir(d): 276 if old in f: 277 old_file = os.path.join(d, f) 278 new_file = os.path.join(d, f.replace(old, new)) 279 os.rename(old_file, new_file) 280 return None
Renames the files inside the subfolders in the parent folder.
Renames from an old string to the new string.
If no folder is provided, the current working directory is used.
283def copy_to_folders( 284 folder=None, 285 extension:str=None, 286 strings_to_delete:list=[], 287 ) -> None: 288 """Copies the files from the parent `folder` with the given `extension` to individual subfolders. 289 290 The subfolders are named as the original files, 291 removing the strings from the `strings_to_delete` list. 292 If no `folder` is provided, it runs in the current working directory. 293 """ 294 if folder is None: 295 folder = os.getcwd() 296 old_files = get_list(folder=folder, include=extension) 297 if old_files is None: 298 raise ValueError('No ' + extension + ' files found in path!') 299 for old_file in old_files: 300 new_file = old_file 301 for string in strings_to_delete: 302 new_file = new_file.replace(string, '') 303 path = new_file.replace(extension, '') 304 os.makedirs(path, exist_ok=True) 305 new_file_path = os.path.join(path, new_file) 306 shutil.copy(old_file, new_file_path) 307 return None
Copies the files from the parent folder with the given extension to individual subfolders.
The subfolders are named as the original files,
removing the strings from the strings_to_delete list.
If no folder is provided, it runs in the current working directory.