Initial commit
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
.venv/
|
||||||
|
downloaded_files/
|
8
.idea/.gitignore
generated
vendored
Normal file
8
.idea/.gitignore
generated
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# Editor-based HTTP Client requests
|
||||||
|
/httpRequests/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
10
.idea/apacheWFSSelectiveDownload.iml
generated
Normal file
10
.idea/apacheWFSSelectiveDownload.iml
generated
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$">
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
||||||
|
</content>
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
7
.idea/misc.xml
generated
Normal file
7
.idea/misc.xml
generated
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="Black">
|
||||||
|
<option name="sdkName" value="Python 3.12 (apacheWFSSelectiveDownload)" />
|
||||||
|
</component>
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (apacheWFSSelectiveDownload)" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/apacheWFSSelectiveDownload.iml" filepath="$PROJECT_DIR$/.idea/apacheWFSSelectiveDownload.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
69
main.py
Normal file
69
main.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# apacheWFSSelectiveDownload (Apache Web File Server Selective Download)
|
||||||
|
#
|
||||||
|
# Simple, date-selective download script for apache web file server.
|
||||||
|
#
|
||||||
|
# Copyright (c) 2025 VELENDEU, eetnaviation
|
||||||
|
#
|
||||||
|
# https://velend.eu/
|
||||||
|
# https://git.velend.eu/eetnaviation/ApacheWFSSelectiveDownload
|
||||||
|
#
|
||||||
|
# All rights reserved unless otherwise stated.
|
||||||
|
#
|
||||||
|
# Permission is hereby denied to copy, modify, distribute, sublicense,
|
||||||
|
# or sell copies of this software without explicit prior written consent.
|
||||||
|
#
|
||||||
|
# All dependency trademarks and names if included are subject to copyright
|
||||||
|
# of their respective owners.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
# THE SOFTWARE.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import os
|
||||||
|
|
||||||
|
##################### BEGIN USER CONFIGURABLE PART #####################
|
||||||
|
|
||||||
|
BASE_URL = '' # Set your apache Web File Server link here
|
||||||
|
DOWNLOAD_DIR = './downloaded_files' # Downloaded files directory name. Default is downloaded_files
|
||||||
|
CUTOFF_DATE = datetime.now() - timedelta(days=30) # Maximum file last modified date. If days = 30 then files with an older last modified than 30 days will NOT be downloaded.
|
||||||
|
|
||||||
|
###################### END USER CONFIGURABLE PART ######################
|
||||||
|
|
||||||
|
os.makedirs(DOWNLOAD_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
response = requests.get(BASE_URL)
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
rows = soup.find_all('tr')
|
||||||
|
for row in rows:
|
||||||
|
cols = row.find_all('td')
|
||||||
|
if len(cols) < 3:
|
||||||
|
continue
|
||||||
|
link_tag = cols[1].find('a')
|
||||||
|
if not link_tag:
|
||||||
|
continue
|
||||||
|
filename = link_tag.get('href')
|
||||||
|
if filename.endswith('/'):
|
||||||
|
continue
|
||||||
|
date_str = cols[2].text.strip()
|
||||||
|
try:
|
||||||
|
file_date = datetime.strptime(date_str, '%d-%b-%Y %H:%M')
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
if file_date >= CUTOFF_DATE:
|
||||||
|
file_url = urljoin(BASE_URL, filename)
|
||||||
|
local_path = os.path.join(DOWNLOAD_DIR, filename)
|
||||||
|
r = requests.get(file_url, stream=True)
|
||||||
|
with open(local_path, 'wb') as f:
|
||||||
|
for chunk in r.iter_content(chunk_size=8192):
|
||||||
|
f.write(chunk)
|
||||||
|
print(f"Downloaded: {filename}")
|
Reference in New Issue
Block a user