# ============================================================================= # apacheWFSSelectiveDownload (Apache Web File Server Selective Download) # # Simple, date-selective download script for apache web file server. # # Copyright (c) 2025 VELENDEU, eetnaviation # # https://velend.eu/ # https://git.velend.eu/eetnaviation/ApacheWFSSelectiveDownload # # All rights reserved unless otherwise stated. # # Permission is hereby denied to copy, modify, distribute, sublicense, # or sell copies of this software without explicit prior written consent. # # All dependency trademarks and names if included are subject to copyright # of their respective owners. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # ============================================================================= import requests from bs4 import BeautifulSoup from urllib.parse import urljoin from datetime import datetime, timedelta import os ##################### BEGIN USER CONFIGURABLE PART ##################### BASE_URL = '' # Set your apache Web File Server link here DOWNLOAD_DIR = './downloaded_files' # Downloaded files directory name. Default is downloaded_files CUTOFF_DATE = datetime.now() - timedelta(days=30) # Maximum file last modified date. If days = 30 then files with an older last modified than 30 days will NOT be downloaded. ###################### END USER CONFIGURABLE PART ###################### os.makedirs(DOWNLOAD_DIR, exist_ok=True) response = requests.get(BASE_URL) soup = BeautifulSoup(response.text, 'html.parser') rows = soup.find_all('tr') for row in rows: cols = row.find_all('td') if len(cols) < 3: continue link_tag = cols[1].find('a') if not link_tag: continue filename = link_tag.get('href') if filename.endswith('/'): continue date_str = cols[2].text.strip() try: file_date = datetime.strptime(date_str, '%d-%b-%Y %H:%M') except ValueError: continue if file_date >= CUTOFF_DATE: file_url = urljoin(BASE_URL, filename) local_path = os.path.join(DOWNLOAD_DIR, filename) r = requests.get(file_url, stream=True) with open(local_path, 'wb') as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) print(f"Downloaded: {filename}")