commit 780ff1d58613da1f7f32237f73b82b55ec28d286 Author: eetnaviation Date: Fri Jun 27 15:15:17 2025 +0300 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d623faf --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.venv/ +downloaded_files/ \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/apacheWFSSelectiveDownload.iml b/.idea/apacheWFSSelectiveDownload.iml new file mode 100644 index 0000000..2c80e12 --- /dev/null +++ b/.idea/apacheWFSSelectiveDownload.iml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..7d1b485 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..4d202f5 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..8d65682 --- /dev/null +++ b/main.py @@ -0,0 +1,69 @@ +# ============================================================================= +# apacheWFSSelectiveDownload (Apache Web File Server Selective Download) +# +# Simple, date-selective download script for apache web file server. +# +# Copyright (c) 2025 VELENDEU, eetnaviation +# +# https://velend.eu/ +# https://git.velend.eu/eetnaviation/ApacheWFSSelectiveDownload +# +# All rights reserved unless otherwise stated. +# +# Permission is hereby denied to copy, modify, distribute, sublicense, +# or sell copies of this software without explicit prior written consent. +# +# All dependency trademarks and names if included are subject to copyright +# of their respective owners. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# ============================================================================= + +import requests +from bs4 import BeautifulSoup +from urllib.parse import urljoin +from datetime import datetime, timedelta +import os + +##################### BEGIN USER CONFIGURABLE PART ##################### + +BASE_URL = '' # Set your apache Web File Server link here +DOWNLOAD_DIR = './downloaded_files' # Downloaded files directory name. Default is downloaded_files +CUTOFF_DATE = datetime.now() - timedelta(days=30) # Maximum file last modified date. If days = 30 then files with an older last modified than 30 days will NOT be downloaded. + +###################### END USER CONFIGURABLE PART ###################### + +os.makedirs(DOWNLOAD_DIR, exist_ok=True) + +response = requests.get(BASE_URL) +soup = BeautifulSoup(response.text, 'html.parser') +rows = soup.find_all('tr') +for row in rows: + cols = row.find_all('td') + if len(cols) < 3: + continue + link_tag = cols[1].find('a') + if not link_tag: + continue + filename = link_tag.get('href') + if filename.endswith('/'): + continue + date_str = cols[2].text.strip() + try: + file_date = datetime.strptime(date_str, '%d-%b-%Y %H:%M') + except ValueError: + continue + if file_date >= CUTOFF_DATE: + file_url = urljoin(BASE_URL, filename) + local_path = os.path.join(DOWNLOAD_DIR, filename) + r = requests.get(file_url, stream=True) + with open(local_path, 'wb') as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + print(f"Downloaded: {filename}")