import json import argparse from pathlib import Path from datetime import datetime from database import LLMDatabase from proxy_addon import load_config def export_training_data(output_path: str, db_path: str = "llm_data.db", include_reasoning: bool = True) -> int: db = LLMDatabase(db_path) count = db.export_to_jsonl(output_path, include_reasoning) return count def export_with_metadata(output_path: str, db_path: str = "llm_data.db") -> int: db = LLMDatabase(db_path) conversations = db.get_all_conversations() count = 0 with open(output_path, 'w', encoding='utf-8') as f: for conv in conversations: messages = db.get_conversation_messages(conv['conversation_id']) if not messages: continue data = { 'messages': messages, 'metadata': { 'conversation_id': conv['conversation_id'], 'created_at': conv['created_at'], 'updated_at': conv['updated_at'] } } jsonl_line = json.dumps(data, ensure_ascii=False) f.write(jsonl_line + '\n') count += 1 return count def main(): config = load_config() export_config = config.get('export', {}) db_config = config.get('database', {}) parser = argparse.ArgumentParser(description='Export LLM training data to JSONL format') parser.add_argument('--output', '-o', type=str, default=f"exports/training_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.jsonl", help='Output file path') parser.add_argument('--db', type=str, default=db_config.get('path', 'llm_data.db'), help='Database file path') parser.add_argument('--no-reasoning', action='store_true', help='Exclude reasoning content from export') parser.add_argument('--with-metadata', action='store_true', help='Include metadata in export') parser.add_argument('--stats', action='store_true', help='Show database statistics') args = parser.parse_args() if args.stats: db = LLMDatabase(args.db) stats = db.get_stats() print("\nDatabase Statistics:") print(f" Conversations: {stats['conversations']}") print(f" Requests: {stats['requests']}") print(f" Responses: {stats['responses']}") print(f" Total Tokens: {stats['total_tokens']}") return output_path = Path(args.output) output_path.parent.mkdir(parents=True, exist_ok=True) include_reasoning = not args.no_reasoning if args.with_metadata: count = export_with_metadata(str(output_path), args.db) print(f"\nExported {count} conversations with metadata to: {output_path}") else: count = export_training_data(str(output_path), args.db, include_reasoning) print(f"\nExported {count} conversations to: {output_path}") if include_reasoning: print(" (Reasoning content included)") else: print(" (Reasoning content excluded)") if __name__ == '__main__': main()