Ce script fait partie des projets Python de HyperSkill.
j'ai fait toute la formation python de Hyperskill pendant le Confinement 2020.
| import sys | |
| import os | |
| from collections import deque | |
| from bs4 import BeautifulSoup | |
| import requests | |
| from colorama import Fore | |
| args = sys.argv | |
| folder = args[1] # "Yolo" | |
| history = deque() | |
| list_tag = ["p", "h1", "h2", "h3", "h4", "h5", "h6", "a", "ul", "ol", "li"] | |
| def validate_url(_url): | |
| if "https://" in _url: | |
| return _url, _url.strip("https://").split(".")[0] | |
| else: | |
| if len(_url.split(".")) != 1: | |
| return f"https://{_url}", _url.split(".")[0] | |
| return False, None | |
| print(Fore.BLUE) | |
| while True: | |
| if not os.path.exists(folder): | |
| os.mkdir(folder) | |
| user_input = input("Url ?") | |
| if user_input == "exit": | |
| exit(0) | |
| if user_input == "back": | |
| history.pop() | |
| user_input = history[-1] | |
| url, filename = validate_url(user_input) | |
| print(url, filename) | |
| if url: | |
| history.append(url) | |
| req = requests.get(url) | |
| with open(f"{folder}/{filename}", "w") as file: | |
| raw_text = req.content | |
| soup = BeautifulSoup(raw_text, "html.parser") | |
| text = [] | |
| for tags in soup.findAll(list_tag): | |
| if tags.get_text() != "\n": | |
| raw = tags.get_text().replace("\t", "").replace("\r", "").replace("\n", "") | |
| text.append(raw) | |
| file.write("\n".join(text)) | |
| print("\n".join(text)) | |
| else: | |
| if os.path.exists(f"{folder}/{user_input}"): | |
| with open(f"{folder}/{user_input}", "r") as file: | |
| print(file.read()) | |
| else: | |
| print("Error: Incorrect URL") |
cyvax - 2025