Source code for the_data_packet.utils.http

"""HTTP client utility."""

from typing import Any, Optional

import requests
from bs4 import BeautifulSoup

from the_data_packet.core.exceptions import NetworkError
from the_data_packet.core.logging import get_logger

logger = get_logger(__name__)


[docs] class HTTPClient: """Simple HTTP client with error handling and configuration."""
[docs] def __init__(self, timeout: Optional[int] = None, user_agent: Optional[str] = None): """ Initialize HTTP client. Args: timeout: Request timeout (defaults to config) user_agent: User agent string (defaults to config) """ from ..core.config import get_config config = get_config() self.timeout = timeout or config.http_timeout self.user_agent = user_agent or config.user_agent # Create session self.session = requests.Session() self.session.headers.update( { "User-Agent": self.user_agent, "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Accept-Encoding": "gzip, deflate", "Connection": "keep-alive", } )
[docs] def get(self, url: str, **kwargs: Any) -> requests.Response: """ Make a GET request. Args: url: URL to fetch **kwargs: Additional arguments passed to requests.get Returns: Response object Raises: NetworkError: If request fails """ kwargs.setdefault("timeout", self.timeout) try: logger.debug(f"GET {url}") response = self.session.get(url, **kwargs) response.raise_for_status() return response except requests.RequestException as e: raise NetworkError(f"HTTP request failed for {url}: {e}")
[docs] def get_soup(self, url: str, **kwargs: Any) -> BeautifulSoup: """ Get a URL and return parsed HTML. Args: url: URL to fetch **kwargs: Additional arguments passed to get() Returns: BeautifulSoup object Raises: NetworkError: If request fails """ response = self.get(url, **kwargs) return BeautifulSoup(response.content, "html.parser")