Files
ApacheWFSSelectiveDownload/main.py
2025-06-27 15:15:17 +03:00

70 lines
2.7 KiB
Python

# =============================================================================
# apacheWFSSelectiveDownload (Apache Web File Server Selective Download)
#
# Simple, date-selective download script for apache web file server.
#
# Copyright (c) 2025 VELENDEU, eetnaviation
#
# https://velend.eu/
# https://git.velend.eu/eetnaviation/ApacheWFSSelectiveDownload
#
# All rights reserved unless otherwise stated.
#
# Permission is hereby denied to copy, modify, distribute, sublicense,
# or sell copies of this software without explicit prior written consent.
#
# All dependency trademarks and names if included are subject to copyright
# of their respective owners.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# =============================================================================
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from datetime import datetime, timedelta
import os
##################### BEGIN USER CONFIGURABLE PART #####################
BASE_URL = '' # Set your apache Web File Server link here
DOWNLOAD_DIR = './downloaded_files' # Downloaded files directory name. Default is downloaded_files
CUTOFF_DATE = datetime.now() - timedelta(days=30) # Maximum file last modified date. If days = 30 then files with an older last modified than 30 days will NOT be downloaded.
###################### END USER CONFIGURABLE PART ######################
os.makedirs(DOWNLOAD_DIR, exist_ok=True)
response = requests.get(BASE_URL)
soup = BeautifulSoup(response.text, 'html.parser')
rows = soup.find_all('tr')
for row in rows:
cols = row.find_all('td')
if len(cols) < 3:
continue
link_tag = cols[1].find('a')
if not link_tag:
continue
filename = link_tag.get('href')
if filename.endswith('/'):
continue
date_str = cols[2].text.strip()
try:
file_date = datetime.strptime(date_str, '%d-%b-%Y %H:%M')
except ValueError:
continue
if file_date >= CUTOFF_DATE:
file_url = urljoin(BASE_URL, filename)
local_path = os.path.join(DOWNLOAD_DIR, filename)
r = requests.get(file_url, stream=True)
with open(local_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
print(f"Downloaded: {filename}")