Initial commit

2025-06-27 15:15:17 +03:00
commit 780ff1d586
7 changed files with 110 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+.venv/
+downloaded_files/
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/.idea/apacheWFSSelectiveDownload.iml
+++ b/.idea/apacheWFSSelectiveDownload.iml
@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.12 (apacheWFSSelectiveDownload)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (apacheWFSSelectiveDownload)" project-jdk-type="Python SDK" />
+</project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/apacheWFSSelectiveDownload.iml" filepath="$PROJECT_DIR$/.idea/apacheWFSSelectiveDownload.iml" />
+    </modules>
+  </component>
+</project>
--- a/main.py
+++ b/main.py
@ -0,0 +1,69 @@
+# =============================================================================
+# apacheWFSSelectiveDownload (Apache Web File Server Selective Download)
+#
+# Simple, date-selective download script for apache web file server.
+#
+# Copyright (c) 2025 VELENDEU, eetnaviation
+#
+# https://velend.eu/
+# https://git.velend.eu/eetnaviation/ApacheWFSSelectiveDownload
+#
+# All rights reserved unless otherwise stated.
+#
+# Permission is hereby denied to copy, modify, distribute, sublicense,
+# or sell copies of this software without explicit prior written consent.
+#
+# All dependency trademarks and names if included are subject to copyright
+# of their respective owners.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+# =============================================================================
+
+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin
+from datetime import datetime, timedelta
+import os
+
+##################### BEGIN USER CONFIGURABLE PART #####################
+
+BASE_URL = ''  # Set your apache Web File Server link here
+DOWNLOAD_DIR = './downloaded_files' # Downloaded files directory name. Default is downloaded_files
+CUTOFF_DATE = datetime.now() - timedelta(days=30) # Maximum file last modified date. If days = 30 then files with an older last modified than 30 days will NOT be downloaded.
+
+###################### END USER CONFIGURABLE PART ######################
+
+os.makedirs(DOWNLOAD_DIR, exist_ok=True)
+
+response = requests.get(BASE_URL)
+soup = BeautifulSoup(response.text, 'html.parser')
+rows = soup.find_all('tr')
+for row in rows:
+    cols = row.find_all('td')
+    if len(cols) < 3:
+        continue
+    link_tag = cols[1].find('a')
+    if not link_tag:
+        continue
+    filename = link_tag.get('href')
+    if filename.endswith('/'):
+        continue
+    date_str = cols[2].text.strip()
+    try:
+        file_date = datetime.strptime(date_str, '%d-%b-%Y %H:%M')
+    except ValueError:
+        continue
+    if file_date >= CUTOFF_DATE:
+        file_url = urljoin(BASE_URL, filename)
+        local_path = os.path.join(DOWNLOAD_DIR, filename)
+        r = requests.get(file_url, stream=True)
+        with open(local_path, 'wb') as f:
+            for chunk in r.iter_content(chunk_size=8192):
+                f.write(chunk)
+        print(f"Downloaded: {filename}")