""" Trendyol Best Seller Scraper - Backend Integration Veritabanından kategorileri alıp otomatik çeker """ import requests import json import time import math import os from typing import Dict, List, Any, Optional from datetime import datetime from logging_config import get_logger log = get_logger("scraper") class TrendyolScraper: """Trendyol API'den best seller ürünlerini çeker""" API_BASE_URL = "https://apigw.trendyol.com/discovery-sfint-browsing-service/api/top-rankings/top-ranking-contents" def __init__(self, category_id: int, page_size: int = 20): """ Args: category_id: Trendyol kategori ID page_size: Sayfa başına ürün sayısı (max 20) """ self.category_id = category_id self.page_size = min(page_size, 20) self.headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", "Accept": "application/json", "Referer": "https://www.trendyol.com/" } def fetch_page(self, page: int) -> Optional[Dict[str, Any]]: """Tek sayfa çeker""" params = { "categoryId": self.category_id, "rankingType": "bestSeller", "webGenderId": 1, "page": page, "pageSize": self.page_size, "channelId": 1, "storefrontId": 1, "language": "tr", "countryCode": "TR" } try: response = requests.get( self.API_BASE_URL, params=params, headers=self.headers, timeout=10 ) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: log.warning(f"Sayfa {page} error: {e}") return None def get_total_count(self) -> int: """Toplam ürün sayısını öğrenir""" data = self.fetch_page(page=1) if not data or not data.get('isSuccess'): return 0 return data.get('totalCount', 0) def calculate_total_pages(self, total_count: int, max_pages: int = None) -> int: """Kaç sayfa çekeceğimizi hesaplar""" total_pages = math.ceil(total_count / self.page_size) # Max sayfa limiti varsa uygula if max_pages: total_pages = min(total_pages, max_pages) return total_pages def fetch_all_products(self, delay: float = 1.0, max_pages: int = 5) -> List[Dict[str, Any]]: """ Ürünleri çeker Args: delay: İstekler arası bekleme süresi max_pages: Maksimum sayfa sayısı (default: 5 = 100 ürün) Returns: Ürün listesi """ # Toplam ürün sayısını öğren total_count = self.get_total_count() if total_count == 0: return [] # Sayfa sayısını hesapla total_pages = self.calculate_total_pages(total_count, max_pages) log.info(f"Kategori {self.category_id}: {total_count} ürün, {total_pages} sayfa çekilecek") # Sayfaları çek all_products = [] for page in range(1, total_pages + 1): data = self.fetch_page(page) if not data or not data.get('isSuccess'): log.warning(f"Sayfa {page} atlandı") continue products = data.get('products', []) all_products.extend(products) # Rate limiting if page < total_pages: time.sleep(delay) return all_products def save_to_json(self, products: List[Dict[str, Any]], filename: str) -> bool: """ JSON dosyasına kaydeder Args: products: Ürün listesi filename: Dosya yolu Returns: Başarılı mı? """ try: # Dizin yoksa oluştur os.makedirs(os.path.dirname(filename), exist_ok=True) output = { "scraped_at": datetime.now().isoformat(), "category_id": self.category_id, "total_products": len(products), "products": products } with open(filename, 'w', encoding='utf-8') as f: json.dump(output, f, ensure_ascii=False, indent=2) return True except Exception as e: log.error(f"Dosya kaydetme hatası: {e}") return False def get_category_info(self) -> Optional[Dict[str, Any]]: """Kategori bilgilerini döndürür""" data = self.fetch_page(page=1) if not data or not data.get('isSuccess'): return None return data.get('categoryInfo', {}) class TrendyolSearchScraper: """Trendyol Search API ile ürün çeker — tüm kategori tipleri için çalışır (-c ve -s)""" API_BASE_URL = "https://apigw.trendyol.com/discovery-sfint-search-service/api/search/products" def __init__(self, path_model: str, page_size: int = 24): self.path_model = path_model self.page_size = page_size self.headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", "Accept": "application/json", "Referer": f"https://www.trendyol.com/{path_model}", "Origin": "https://www.trendyol.com" } self.cookies = { "storefrontId": "1", "language": "tr", "countryCode": "TR" } def fetch_page(self, page: int) -> Optional[Dict[str, Any]]: """Tek sayfa çeker""" params = { "pathModel": self.path_model, "pi": page, "ps": self.page_size, "channelId": 1, "storefrontId": 1, "culture": "tr-TR" } try: response = requests.get( self.API_BASE_URL, params=params, headers=self.headers, cookies=self.cookies, timeout=15 ) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: log.warning(f"Search API sayfa {page} error ({self.path_model}): {e}") return None def fetch_all_products(self, delay: float = 1.0, max_pages: int = 10) -> List[Dict[str, Any]]: """Tüm ürünleri çeker, normalize eder (max_pages=10 x page_size=24 = 240 ürün)""" first = self.fetch_page(1) if not first: return [] total_raw = first.get("total") or first.get("totalCount") or first.get("roughTotal") or 0 try: total = int(total_raw) except (ValueError, TypeError): total = 0 raw_products = first.get("products", []) if total == 0 and not raw_products: return [] # total 0 olsa bile ürün varsa en az 1 sayfa çek if total == 0 and raw_products: total = len(raw_products) total_pages = min(math.ceil(total / self.page_size), max_pages) log.info(f"Search API {self.path_model}: {total} ürün, {total_pages} sayfa çekilecek") for page in range(2, total_pages + 1): data = self.fetch_page(page) if data and data.get("products"): raw_products.extend(data["products"]) if page < total_pages: time.sleep(delay) return [_normalize_search_product(p) for p in raw_products] def _normalize_search_product(raw: dict) -> dict: """Search API ürün formatını mevcut sisteme uyumlu hale getir""" brand = raw.get("brand", {}) if isinstance(brand, str): brand = {"name": brand} price = raw.get("price", {}) if isinstance(price, (int, float)): price = {"sellingPrice": price, "originalPrice": price} elif isinstance(price, dict) and "sellingPrice" not in price: # Search API returns current/discountedPrice/originalPrice — map to sellingPrice price["sellingPrice"] = price.get("discountedPrice") or price.get("current") or price.get("originalPrice") or price.get("old") or 0 rating = raw.get("ratingScore", {}) if rating is None: rating = {} return { "id": raw.get("id") or raw.get("contentId"), "name": raw.get("name", ""), "brand": brand, "price": price, "ratingScore": rating, "url": raw.get("url", ""), "imageUrl": raw.get("image", raw.get("imageUrl", "")), "merchantListings": raw.get("merchantListings", []), "winnerVariant": raw.get("winnerVariant", {}), "socialProofs": raw.get("socialProofs", []), "categoryId": raw.get("categoryId"), "categoryName": raw.get("categoryName"), } def scrape_category(category_id: int, category_name: str, output_dir: str = "../categories") -> Dict[str, Any]: """ Tek bir kategoriyi çeker Args: category_id: Trendyol kategori ID category_name: Kategori adı output_dir: JSON dosyalarının kaydedileceği dizin Returns: Scraping sonuçları """ result = { "category_id": category_id, "category_name": category_name, "success": False, "total_products": 0, "file_path": None, "error": None } try: # Scraper oluştur scraper = TrendyolScraper(category_id=category_id, page_size=20) # Ürünleri çek (max 5 sayfa = 100 ürün) products = scraper.fetch_all_products(delay=1.0, max_pages=5) if not products: result["error"] = "No products found" return result # JSON'a kaydet filename = f"{output_dir}/{category_name}_{category_id}.json" success = scraper.save_to_json(products, filename) if success: result["success"] = True result["total_products"] = len(products) result["file_path"] = filename else: result["error"] = "Failed to save JSON" except Exception as e: result["error"] = str(e) return result def scrape_multiple_categories(categories: List[tuple], delay: float = 2.0) -> Dict[str, Any]: """ Birden fazla kategoriyi çeker Args: categories: [(category_id, category_name), ...] listesi delay: Kategoriler arası bekleme süresi Returns: Genel sonuçlar """ results = { "scraped_at": datetime.now().isoformat(), "total_categories": len(categories), "successful": 0, "failed": 0, "total_products": 0, "details": [] } for i, (cat_id, cat_name) in enumerate(categories, 1): log.info(f"[{i}/{len(categories)}] {cat_name} (ID: {cat_id})") result = scrape_category(cat_id, cat_name) results["details"].append(result) if result["success"]: results["successful"] += 1 results["total_products"] += result["total_products"] log.info(f"Başarılı: {result['total_products']} ürün") else: results["failed"] += 1 log.error(f"Hata: {result['error']}") # Kategoriler arası bekleme if i < len(categories): time.sleep(delay) return results