"""Command-line interface for The Data Packet."""
import argparse
import sys
from pathlib import Path
from the_data_packet.core import (
ConfigurationError,
get_config,
get_logger,
setup_logging,
)
from the_data_packet.workflows import PodcastPipeline
[docs]
def main() -> None:
"""Main CLI function for podcast generation."""
parser = argparse.ArgumentParser(
description="Generate automated tech news podcasts",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Generate complete podcast with environment variables
the-data-packet --output ./episode
# Generate script only with custom API keys
the-data-packet --anthropic-key sk-ant-... --script-only --output ./scripts
# Generate with custom show name and voices
the-data-packet --show-name "Tech Brief" --voice-a charon --voice-b aoede
# Generate from specific sources and categories
the-data-packet --sources wired techcrunch --categories security ai --output ./multi-source
Environment Variables:
ANTHROPIC_API_KEY - Claude API key for script generation
GCS_BUCKET_NAME - Google Cloud Storage bucket for audio synthesis
GOOGLE_APPLICATION_CREDENTIALS - Path to Google Cloud service account JSON (optional)
ELEVENLABS_API_KEY - [DEPRECATED] ElevenLabs API key (use Google Cloud TTS instead)
S3_BUCKET_NAME - S3 bucket for uploads (optional)
AWS_ACCESS_KEY_ID - AWS access key (optional)
AWS_SECRET_ACCESS_KEY - AWS secret key (optional)
MONGODB_USERNAME - MongoDB username for episode tracking (optional)
MONGODB_PASSWORD - MongoDB password for episode tracking (optional)
""",
)
# API Keys
parser.add_argument(
"--anthropic-key",
help="Anthropic API key (overrides ANTHROPIC_API_KEY env var)",
)
parser.add_argument(
"--gcs-bucket",
help="Google Cloud Storage bucket for audio synthesis (overrides GCS_BUCKET_NAME env var)",
)
parser.add_argument(
"--google-credentials",
help="Path to Google Cloud service account JSON (overrides GOOGLE_APPLICATION_CREDENTIALS env var)",
)
parser.add_argument(
"--elevenlabs-key",
help="[DEPRECATED] ElevenLabs API key (overrides ELEVENLABS_API_KEY env var)",
)
parser.add_argument(
"--mongodb-username",
help="MongoDB username for episode tracking and article deduplication (overrides MONGODB_USERNAME env var)",
)
parser.add_argument(
"--mongodb-password",
help="MongoDB password for episode tracking and article deduplication (overrides MONGODB_PASSWORD env var)",
)
# Content Options
parser.add_argument(
"--sources",
nargs="+",
default=["wired"],
choices=["wired", "techcrunch"],
help="Article sources to use (default: wired)",
)
parser.add_argument(
"--categories",
nargs="+",
default=["security", "ai"],
help="Article categories to fetch (default: security ai)",
)
parser.add_argument(
"--max-articles",
type=int,
default=1,
help="Maximum articles per source (default: 1)",
)
# Generation Options
parser.add_argument(
"--script-only",
action="store_true",
help="Generate script only (skip audio)",
)
parser.add_argument(
"--audio-only",
action="store_true",
help="Generate audio only (requires existing script)",
)
# Audio Settings
parser.add_argument(
"--male-voice",
default="en-US-Studio-Q",
help="Google Cloud TTS voice name for first speaker (default: en-US-Studio-Q - Alex)",
)
parser.add_argument(
"--female-voice",
default="en-US-Studio-O",
help="Google Cloud TTS voice name for second speaker (default: en-US-Studio-O - Sam)",
)
# Output Settings
parser.add_argument(
"--output",
type=Path,
default=Path("./output"),
help="Output directory (default: ./output)",
)
parser.add_argument(
"--show-name",
default="The Data Packet",
help="Name of your podcast show (default: The Data Packet)",
)
# S3 Upload
parser.add_argument(
"--s3-bucket",
help="S3 bucket for uploads (overrides S3_BUCKET_NAME env var)",
)
parser.add_argument(
"--no-s3",
action="store_true",
help="Disable S3 uploads even if configured",
)
# Other Options
parser.add_argument(
"--log-level",
default="INFO",
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
help="Log level (default: INFO)",
)
parser.add_argument(
"--save-intermediate",
action="store_true",
help="Save intermediate files",
)
args = parser.parse_args()
# Validate argument combinations
if args.script_only and args.audio_only:
print("Error: Cannot use both --script-only and --audio-only", file=sys.stderr)
sys.exit(1)
# Set up logging
setup_logging(args.log_level)
logger = get_logger(__name__)
try:
# Build configuration from arguments
config_overrides = {
"show_name": args.show_name,
"output_directory": args.output,
"article_sources": args.sources,
"article_categories": args.categories,
"max_articles_per_source": args.max_articles,
"male_voice": args.male_voice,
"female_voice": args.female_voice,
"save_intermediate_files": args.save_intermediate,
"log_level": args.log_level,
}
# API keys
if args.anthropic_key:
config_overrides["anthropic_api_key"] = args.anthropic_key
if args.gcs_bucket:
config_overrides["gcs_bucket_name"] = args.gcs_bucket
if args.google_credentials:
config_overrides["google_credentials_path"] = args.google_credentials
if args.elevenlabs_key:
config_overrides["elevenlabs_api_key"] = args.elevenlabs_key
if args.mongodb_username:
config_overrides["mongodb_username"] = args.mongodb_username
if args.mongodb_password:
config_overrides["mongodb_password"] = args.mongodb_password
if args.s3_bucket:
config_overrides["s3_bucket_name"] = args.s3_bucket
# Generation options
if args.script_only:
config_overrides.update(
{
"generate_script": True,
"generate_audio": False,
}
)
elif args.audio_only:
config_overrides.update(
{
"generate_script": False,
"generate_audio": True,
}
)
else:
config_overrides.update(
{
"generate_script": True,
"generate_audio": True,
}
)
# Disable S3 if requested
if args.no_s3:
config_overrides["s3_bucket_name"] = None
# Get configuration with overrides
config = get_config(**config_overrides)
logger.info(f"Starting {config.show_name} generation")
logger.info(f"Sources: {config.article_sources}")
logger.info(f"Categories: {config.article_categories}")
logger.info(f"Output: {config.output_directory}")
# Run pipeline
pipeline = PodcastPipeline(config)
result = pipeline.run()
# Report results
if result.success:
print("ā
Podcast generation completed successfully!")
print(f"ā±ļø Execution time: {result.execution_time_seconds:.1f} seconds")
print(f"š° Articles collected: {result.number_of_articles_collected}")
if result.script_generated and result.script_path:
print(f"š Script saved: {result.script_path}")
if result.s3_script_url:
print(f"š Script URL: {result.s3_script_url}")
if result.audio_generated and result.audio_path:
print(f"š§ Audio saved: {result.audio_path}")
if result.s3_audio_url:
print(f"š Audio URL: {result.s3_audio_url}")
else:
print("ā Podcast generation failed!", file=sys.stderr)
print(f"ā±ļø Execution time: {result.execution_time_seconds:.1f} seconds")
print(f"ā Error: {result.error_message}", file=sys.stderr)
sys.exit(1)
except ConfigurationError as e:
logger.error(f"Configuration error: {e}")
print(f"ā Configuration error: {e}", file=sys.stderr)
sys.exit(1)
except KeyboardInterrupt:
logger.info("Interrupted by user")
print("\\nā¹ļø Generation cancelled by user")
sys.exit(130)
except Exception as e:
logger.error(f"Unexpected error: {e}")
print(f"ā Unexpected error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()