import asyncio import logging import math import secrets from pathlib import Path import aiohttp _logger = logging.getLogger("worthless.helper") async def download_file( file_url: str, file_name: str, file_path: Path | str, file_len: int = None, overwrite: bool = False, chunks: int = None, threads_num: int = None, ) -> Path: """ Download file name to file_path. You should implement your own download method instead of using this. Args: file_url: Url to download the file from file_name: File name to download into file_path: Path to download file into file_len: File length to support threaded downloading overwrite: Whether overwrite existing file or not chunks: Chunks to write file into memory before writing to disk threads_num: Number of download threads Return: Downloaded file as a Path object """ logger = _logger.getChild("download_file") if not chunks: chunks = 8192 if not threads_num: threads_num = 8 logger.debug("Downloading chunks {} with {} thread".format(chunks, threads_num)) file_path = Path(file_path).joinpath(file_name) async def _download( session: aiohttp.ClientSession, from_bytes: int, to_bytes: int, threaded: bool = None, ) -> Path: headers = {"Range": f"bytes={from_bytes}-{to_bytes if to_bytes else ''}"} if threaded: p = file_path.parent.joinpath(secrets.token_urlsafe(16)) else: p = file_path p.touch(exist_ok=True) rsp = await session.get(file_url, headers=headers, timeout=None) if rsp.status == 416: # Not an error, so yeah. return p rsp.raise_for_status() with p.open("ab") as file: async for chunk in rsp.content.iter_chunked(chunks): await asyncio.to_thread(file.write, chunk) return p if overwrite: file_path.unlink(missing_ok=True) if file_path.exists(): cur_len = file_path.stat().st_size else: file_path.touch() cur_len = 0 if not file_len or threads_num == 1: async with aiohttp.ClientSession() as s: return await _download(session=s, from_bytes=cur_len, to_bytes=file_len) download_bytes = file_len - cur_len # if bytes * threads is smaller than file_len then we will not get the full file. download_bytes_t = math.ceil(download_bytes / threads_num) download_jobs = [] current_bytes = cur_len async with aiohttp.ClientSession() as s: for thread in range(threads_num): next_bytes = current_bytes + download_bytes_t if next_bytes > file_len: next_bytes = file_len download_jobs.append( _download( session=s, from_bytes=current_bytes, to_bytes=next_bytes, threaded=True, ) ) # Move to next bytes current_bytes = next_bytes all_bytes = await asyncio.gather(*download_jobs) # Merge bytes into the file with file_path.open("ab") as f: for bytes_path in all_bytes: f.write(bytes_path.read_bytes()) bytes_path.unlink()