Initializing...

Preparing the app. This may take a moment.

Still loading… your network may be slow or a script is blocked.

TheDoor.py

Author:

| Size: 378.30 KB

|

#/bin/python

-- coding: utf-8 --

import argparse import calendar import csv import glob import hashlib import html import importlib import itertools import os import platform import queue import random import re import requests import shutil import signal import subprocess import sys import textwrap import threading import time import xml.etree.ElementTree as ET import zipfile import urllib.request from urllib.error import HTTPError, URLError from datetime import datetime, timedelta from html.parser import HTMLParser from pathlib import Path from io import StringIO

List of User-Agent strings

user_agents = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0", "Mozilla/5.0 (X11; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0" ]

List of required third-party modules

third_party_modules = [ 'chardet', 'pygame', 'bs4', # BeautifulSoup (part of bs4 package) 'tqdm', 'PySimpleGUI', 'colorama', ]

def check_and_install_modules(): os_name = platform.system()

if os_name == "Linux":
    # Install pip if not already installed
    try:
        subprocess.check_call(["sudo", "apt", "-qq", "-y", "install", "python3-pip"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    except subprocess.CalledProcessError:
        print("Failed to install pip. Ensure you have sudo privileges.")

    # Install python3-tk
    try:
        subprocess.check_call(["sudo", "apt", "-qq", "-y", "install", "python3-tk"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    except subprocess.CalledProcessError:
        print("Failed to install python3-tk. Ensure you have sudo privileges.")

elif os_name == "Darwin":  # macOS
    # Check if Tkinter is available
    try:
        import tkinter
        print("Tkinter is available.")
    except ImportError:
        print("Tkinter is not available. Please install it manually or ensure your Python installation includes Tkinter.")
        # Optionally, you could guide users to install Python with Tkinter:
        print("You might need to reinstall Python with Tkinter support. For example, using Homebrew:")
        print("brew install python --with-tcl-tk")

# For Windows, we'll rely on pip for Python packages
# Note: System packages like tkinter should be pre-installed or installed manually

for module in third_party_modules:
    try:
        importlib.import_module(module)
        print(f"{module} is already installed.")
    except ImportError:
        print(f"{module} is not installed.")
        pip_command = [sys.executable, '-m', 'pip', 'install', module]
        try:
            subprocess.check_call(pip_command)
            print(f"{module} installed successfully.")
        except subprocess.CalledProcessError:
            print(f"Failed to install {module}.")

def import_modules(): # Your existing import logic here global chardet, concurrent, pygame, BeautifulSoup, Pool, tqdm, stop_flag, sg import chardet import concurrent.futures import pygame from bs4 import BeautifulSoup from multiprocessing import Pool from tqdm import tqdm import PySimpleGUI as sg stop_flag = threading.Event() from colorama import Fore, Style, init

Function to get a random User-Agent

def get_random_user_agent(): return random.choice(user_agents)

Define global variables and directories

failed_downloads = [] verbose = "-v" in sys.argv edgar_url = "https://www.sec.gov/Archives/edgar/data/" headers = {'User-Agent': "anonymous/[email protected]"} backup_headers = {"User-Agent": "anonymost/[email protected]"} files_found_count = 0 done = False download_directory = os.path.join(os.path.expanduser(""), "sec_archives") download_directory2 = os.path.join(os.path.expanduser(""), "edgar") base_path = (download_directory2) os.makedirs(download_directory, exist_ok=True) os.makedirs(download_directory2, exist_ok=True)

Create a list of all subdirectories from 1993 to 2024, including all four quarters

years = range(1993, 2025) quarters = ["QTR1", "QTR2", "QTR3", "QTR4"] base_url = "https://www.sec.gov/Archives/edgar/full-index"

subdirectories = [ f"{base_url}/{year}/{quarter}/master.zip" for year in years for quarter in quarters if not (year == 20…

Comments

No comments yet

Please complete the captcha

9/22/2024

Create new paste with same settings

Not all user generated content is reviewed by AnonPaste. If you believe this paste violates our community guideline or terms of service, please report it here.

Initializing...

Preparing the app. This may take a moment before app is ready.

AnonPaste is a user-generated content hosting service. The platform and its operators are not responsible for content posted by users.