From ce1dc1e25fc244c8664cf948db8300f5875e0147 Mon Sep 17 00:00:00 2001 From: furkanyigit34 <134547018+furkanyigit34@users.noreply.github.com> Date: Sat, 28 Mar 2026 22:25:25 +0300 Subject: [PATCH] =?UTF-8?q?feat:=20tek=20birle=C5=9Ftirilmi=C5=9F=20JSON?= =?UTF-8?q?=20yap=C4=B1s=C4=B1na=20ge=C3=A7i=C5=9F=20+=20sosyal=20kan?= =?UTF-8?q?=C4=B1t=20fallback?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ne yaptık: - data_consolidator.py: Tüm normalizasyon ve hesaplama mantığını main.py'den çıkardık - Dashboard endpoint 1150 satırdan 25 satıra düştü (main.py -1730/+1880 net) - Enrichment bitince otomatik konsolide dosya oluşturuluyor (report_{id}_data.json) - Eski raporlar ilk dashboard isteğinde lazy migration ile konsolide ediliyor - Trendyol API artık order-count döndürmediği için baskets fallback eklendi - Inline socialProofs (scrape) > enrichment API öncelik sırası uygulandı - Frontend KPI başlıkları orders/baskets durumuna göre dinamik değişiyor - logging_config.py, category_seeder.py, alembic migration eklendi - Playwright ile 9 tab test edildi, tüm veriler doğru Neden yaptık: - 3 farklı kaynaktan her istekte birleştirme yapılması veri tutarsızlığına ve yavaşlığa yol açıyordu - Tek konsolide JSON dosyası ile dashboard anında yükleniyor - Trendyol API değişikliği nedeniyle sipariş verisi kayboluyordu, baskets fallback ile çözüldü Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 137 +- .../src/components/ReportDashboard.jsx | 16 +- .../dashboard-tabs/HiddenChampionsTab.jsx | 18 +- .../components/dashboard-tabs/OverviewTab.jsx | 36 +- backend/Dockerfile | 2 +- ...207dbbac44_add_path_model_to_categories.py | 30 + backend/analytics/champion_finder.py | 88 +- backend/analytics/metrics.py | 13 +- backend/category_seeder.py | 143 ++ backend/data_consolidator.py | 791 ++++++++ backend/database.py | 6 +- backend/google_trends_helper.py | 7 +- backend/logging_config.py | 197 ++ backend/main.py | 1728 ++++------------- backend/scraper.py | 125 +- 15 files changed, 1878 insertions(+), 1459 deletions(-) create mode 100644 backend/alembic/versions/38207dbbac44_add_path_model_to_categories.py create mode 100644 backend/category_seeder.py create mode 100644 backend/data_consolidator.py create mode 100644 backend/logging_config.py diff --git a/CLAUDE.md b/CLAUDE.md index e72ed78..1c7294a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,12 +1,12 @@ # CLAUDE.md -Bu dosya Claude Code (claude.ai/code) için proje rehberidir. +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. ## Proje Özeti -**Trendyol Product Dashboard**: Trendyol e-ticaret platformu için kategori bazlı ürün analiz sistemi. 7 tab'lı dashboard, otomatik rapor oluşturma ve sosyal kanıt metrikleri. +**Trendyol Product Dashboard**: Trendyol e-ticaret platformu için kategori bazlı ürün analiz sistemi. 9 tab'lı dashboard, otomatik rapor oluşturma, sosyal kanıt metrikleri ve hidden champion analizi. -**Stack**: FastAPI + React 19 + Vite + SQLite + Tailwind CSS +**Stack**: FastAPI + React 19 + Vite + PostgreSQL + Tailwind CSS ## Geliştirme Komutları @@ -15,17 +15,33 @@ Bu dosya Claude Code (claude.ai/code) için proje rehberidir. python3 start.py # Manuel başlatma (iki terminal) -cd backend && python3 main.py # Terminal 1 - Backend -cd admin-panel && npm run dev # Terminal 2 - Frontend +cd backend && python3 main.py # Terminal 1 - Backend (port 8001) +cd admin-panel && npm run dev # Terminal 2 - Frontend (port 5173) # Dependency kurulumu cd backend && pip install -r requirements.txt # Python cd admin-panel && npm install # Node.js -# Diğer komutlar -cd admin-panel && npm run build # Frontend build -cd admin-panel && npm run lint # Lint -cd backend && python3 -c "from database import init_db; init_db()" # DB init +# Build & lint +cd admin-panel && npm run build # Frontend production build +cd admin-panel && npm run lint # ESLint + +# Backend testler +cd backend && pytest # Tüm testler +cd backend && pytest tests/test_cache.py # Tek test dosyası +cd backend && pytest tests/test_cache.py -k "test_ttl" # Tek test + +# Frontend E2E testler (Playwright) +cd admin-panel && npx playwright test # Tüm E2E testler +cd admin-panel && npx playwright test tests/rare-keywords.spec.js # Tek spec + +# Docker ile çalıştırma +./build-docker.sh && ./start-docker.sh # Build + start +./stop-docker.sh # Durdur + +# DB migration +cd backend && alembic upgrade head # Migration uygula +cd backend && alembic revision --autogenerate -m "description" # Yeni migration ``` **Erişim URL'leri**: @@ -39,23 +55,36 @@ cd backend && python3 -c "from database import init_db; init_db()" # DB init ### 3 Katmanlı Yapı ``` -React Frontend (admin-panel/) → FastAPI Backend (backend/) → SQLite + JSON -├── CategoryManagement.jsx ├── main.py (~4400 satır) ├── trendyol.db -├── ReportGeneration.jsx ├── database.py ├── categories/*.json -├── ReportList.jsx └── scraper.py └── reports/*.json -└── ReportDashboard.jsx (7 tab) +React Frontend (admin-panel/) → FastAPI Backend (backend/) → PostgreSQL + JSON +├── ReportDashboard.jsx (9 tab) ├── main.py (~5000 satır) ├── trendyol_db +├── ReportGeneration.jsx ├── database.py (ORM) ├── categories/*.json +├── ReportList.jsx ├── scraper.py └── reports/*.json +├── ReportComparison.jsx ├── google_trends_helper.py +└── CategoryManagement.jsx └── analytics/ + ├── metrics.py + └── champion_finder.py ``` -### Dashboard Tab'ları (7 adet) +### Frontend Routes +| Path | Component | Açıklama | +|------|-----------|----------| +| `/` veya `/report` | ReportGeneration | Yeni rapor oluştur | +| `/reports` | ReportList | Kayıtlı raporlar | +| `/reports/:reportId` | ReportDashboard | 9 tab'lı analiz dashboard | +| `/compare` | ReportComparison | Yan yana rapor karşılaştırma | + +### Dashboard Tab'ları (9 adet) | Tab ID | Tab Adı | Component | Açıklama | |--------|---------|-----------|----------| | overview | Genel Bakış | OverviewTab | KPI'lar, özet grafikler | | brand | Marka | BrandTab | Marka analizi, pazar payı | | category | Kategori | CategoryTab | Kategori dağılımı | | origin | Menşei | OriginTab | Ülke bazlı analiz | -| barcode | Barkod | BarcodeTab | Barkod veri analizi | -| keyword | Keyword Aracı | KeywordTab | Anahtar kelime analizi | +| barcode | Barkod | BarcodeTab | Barkod/GS1 menşei analizi | +| keyword | Keyword Aracı | KeywordTab | Anahtar kelime + Google Trends | | product-finder | Ürün Bulma | ProductFinderTab | Ürün arama/filtreleme | +| hidden-champions | Gizli Şampiyonlar | HiddenChampionsTab | Düşük yorum, yüksek puan fırsatları | +| opportunity | Fırsat Analizi | OpportunityTab | Pazar fırsat analizi | ### Veri Akışı @@ -77,12 +106,12 @@ React Frontend (admin-panel/) → FastAPI Backend (backend/) → SQLite + **Backend'den gelen hazır objeleri kullan, ham hesaplama YAPMA:** ```jsx -// ✅ DOĞRU - Hazır veriyi kullan +// DOĞRU - Hazır veriyi kullan const kpis = dashboardData?.kpis || {}; const topProducts = dashboardData?.charts?.top_products || []; const topBrands = dashboardData?.charts?.top_brands || []; -// ❌ YANLIŞ - all_products'tan hesaplama yapma +// YANLIŞ - all_products'tan hesaplama yapma const total = dashboardData?.all_products.reduce((sum, p) => sum + p.price, 0); ``` @@ -97,12 +126,11 @@ Frontend hesaplamalı veri, alan adı uyumsuzluğuna yol açabilir. Detay için: **Çözüm Pattern - Mapping Layer**: ```jsx -// Veriyi component beklentilerine dönüştür const transformed = sourceData.map(item => ({ - country: item.name, // Beklenen alana map'le - name: item.name, // Orijinali koru - count: item.productCount, // Beklenen alana map'le - productCount: item.productCount // Orijinali koru + country: item.name, + name: item.name, + count: item.productCount, + productCount: item.productCount })); ``` @@ -111,7 +139,7 @@ const transformed = sourceData.map(item => ({ 1. Tab config'i `src/constants/tabGroups.js`'e ekle 2. Tab component'ini `src/components/dashboard-tabs/` altına oluştur 3. `ReportDashboard.jsx`'te import et ve render bloğu ekle -4. **Her zaman veri dönüşümü için console.log ekle** +4. Gerekiyorsa backend'e yeni endpoint ekle (`main.py`) ## API Entegrasyonu @@ -123,15 +151,10 @@ const transformed = sourceData.map(item => ({ | ENRICHMENT | 120s | Sosyal kanıt zenginleştirme | | KEYWORD_ANALYSIS | 300s | Keyword analizi | -### Polling Pattern -```jsx -// Exponential backoff with jitter (1s → 5s max) -import { fetchWithTimeout, API_BASE_URL } from '../config/api'; -``` - -### Rate Limit -- Sosyal kanıt API: 2 istek/saniye -- Exponential backoff kullanılır (%75 istek azaltımı sağlandı) +### Rate Limit & Resilience +- Sosyal kanıt API: 2 istek/saniye (RateLimiter) +- Circuit breaker pattern for external API calls +- Exponential backoff with jitter (1s → 5s max) ## Kod Değişiklik Kuralları @@ -141,18 +164,45 @@ import { fetchWithTimeout, API_BASE_URL } from '../config/api'; - Uzun işlemler: BackgroundTasks + progress polling endpoint - Harici API çağrıları: Her zaman timeout parametresi ekle - Cache: BoundedCache kullan (asla sınırsız dict kullanma) +- Analytics hesaplamaları: `analytics/` modülüne koy (metrics.py, champion_finder.py) ### Frontend - `fetchWithTimeout` kullan (`src/config/api.js`'den) - Async işlemler için loading state göster - Eşzamanlı çağrılar için request deduplication uygula +- Grafikler: Recharts kullan, veri dönüşümü `utils/chartTransformers.js`'de +- Export: `utils/exportUtils.js` ile CSV/Excel ### CORS Değişiklikleri -Yeni frontend portları için `main.py`'deki CORS allowlist'e ekle (satır 34-45): +Yeni frontend portları için `main.py`'deki CORS allowlist'e ekle: ```python allow_origins=["http://localhost:5173", "http://localhost:5174", ...] ``` +## Database + +**Dev**: `postgresql://postgres:trendyol123@localhost:5433/trendyol_db` +**Docker**: `postgresql://postgres:trendyol123@postgres:5432/trendyol_db` + +Migrations: Alembic (`backend/alembic/`). Her schema değişikliğinde `alembic revision --autogenerate` çalıştır. + +| Model | Amaç | Anahtar Alanlar | +|-------|------|-----------------| +| Category | Hiyerarşik kategori ağacı | `parent_id` (self-ref), `trendyol_category_id` | +| Snapshot | Aylık veri görüntüleri | `category_id`, `json_file_path` | +| Report | Kayıtlı raporlar | `category_id`, `json_file_path` | +| EnrichmentError | API hata logları | `endpoint`, `error_type`, `status_code` | + +## Deployment + +**Platform**: Coolify + Docker Compose + Traefik reverse proxy + +Docker Compose servisleri: `postgres` (15-alpine), `backend` (FastAPI), `frontend` (Nginx) + +`startup.sh` sırası: PostgreSQL bağlantı bekle → Alembic migration → Kategori seeding → Uvicorn başlat + +Traefik SSE streaming desteği: 100ms flush interval (rapor progress için) + ## Kaynak Limitleri | Kaynak | Limit | @@ -163,26 +213,11 @@ allow_origins=["http://localhost:5173", "http://localhost:5174", ...] | Sosyal kanıt batch | 5 ürün/istek | | Rate limit | 2 istek/saniye (sosyal kanıt) | -## Kritik Dependency'ler - -**Backend**: FastAPI 0.104.1, SQLAlchemy 2.0.45, Uvicorn 0.24.0, Requests 2.31.0, Pytrends 4.9.2 - -**Frontend**: React 19.2.0, Vite 7.2.2, Recharts 3.4.1, Tailwind CSS 4.1.17, Axios 1.13.2 - -## Database Modelleri - -| Model | Amaç | Anahtar Alanlar | -|-------|------|-----------------| -| Category | Hiyerarşik kategori ağacı | `parent_id` (self-ref), `trendyol_category_id` | -| Snapshot | Aylık veri görüntüleri | `category_id`, `json_file_path` | -| Report | Kayıtlı raporlar | `category_id`, `json_file_path` | -| EnrichmentError | API hata logları | `endpoint`, `error_type`, `status_code` | - ## Dokümantasyon | Dosya | Amaç | |-------|------| -| docs/DASHBOARD_ARCHITECTURE.md | **Önemli** - Dashboard veri yapıları | +| docs/DASHBOARD_ARCHITECTURE.md | Dashboard veri yapıları ve KPI tanımları | | docs/bug-fixes/ORIGINTAB_BUG_FIX.md | **Kritik** - Alan adı uyumsuzluk pattern'i | | docs/API_DOCUMENTATION.md | Tam API referansı | | docs/ARCHITECTURE.md | Sistem mimarisi (Türkçe) | diff --git a/admin-panel/src/components/ReportDashboard.jsx b/admin-panel/src/components/ReportDashboard.jsx index 3ec35b8..420812c 100644 --- a/admin-panel/src/components/ReportDashboard.jsx +++ b/admin-panel/src/components/ReportDashboard.jsx @@ -99,17 +99,27 @@ function ReportDashboard() { const products = dashboardData.all_products const totalProducts = products.length - const totalOrders = products.reduce((sum, p) => sum + (p.orders || 0), 0) + const rawOrders = products.reduce((sum, p) => sum + (p.orders || 0), 0) + const totalBaskets = products.reduce((sum, p) => sum + (p.baskets || 0), 0) + // Trendyol API artık order-count döndürmüyor — orders > 0 ise onu, yoksa baskets'ı kullan + const totalOrders = rawOrders > 0 ? rawOrders : totalBaskets + const ordersLabel = rawOrders > 0 ? 'orders' : 'baskets' const totalViews = products.reduce((sum, p) => sum + (p.page_views || 0), 0) + const totalFavorites = products.reduce((sum, p) => sum + (p.favorites || 0), 0) const avgPrice = products.reduce((sum, p) => sum + (p.price || 0), 0) / totalProducts - const totalRevenue = products.reduce((sum, p) => sum + ((p.price || 0) * (p.orders || 0)), 0) + const totalRevenue = rawOrders > 0 + ? products.reduce((sum, p) => sum + ((p.price || 0) * (p.orders || 0)), 0) + : products.reduce((sum, p) => sum + ((p.price || 0) * (p.baskets || 0)), 0) const kpis = { totalProducts, totalOrders, + totalBaskets, totalViews, + totalFavorites, avgPrice: Math.round(avgPrice), - totalRevenue: Math.round(totalRevenue) + totalRevenue: Math.round(totalRevenue), + ordersLabel } console.log('✅ [KPI] Calculated KPIs:', kpis) diff --git a/admin-panel/src/components/dashboard-tabs/HiddenChampionsTab.jsx b/admin-panel/src/components/dashboard-tabs/HiddenChampionsTab.jsx index dabf390..3931b5a 100644 --- a/admin-panel/src/components/dashboard-tabs/HiddenChampionsTab.jsx +++ b/admin-panel/src/components/dashboard-tabs/HiddenChampionsTab.jsx @@ -12,8 +12,8 @@ export default function HiddenChampionsTab({ reportId }) { // Filters const [minRating, setMinRating] = useState(4.0) const [maxReview, setMaxReview] = useState(100) - const [minOrders, setMinOrders] = useState(5) - const [sortKey, setSortKey] = useState('performance_score') + const [minOrders, setMinOrders] = useState(0) + const [sortKey, setSortKey] = useState('hidden_champion_score') const [sortDir, setSortDir] = useState('desc') const [showFilters, setShowFilters] = useState(false) @@ -41,9 +41,9 @@ export default function HiddenChampionsTab({ reportId }) { // Filtered & sorted products const filteredProducts = useMemo(() => { - if (!data?.products) return [] + if (!data?.hidden_champions) return [] - return data.products + return data.hidden_champions .filter(p => { const rating = p.rating || 0 const reviewCount = p.review_count || p.reviewCount || 0 @@ -230,10 +230,10 @@ export default function HiddenChampionsTab({ reportId }) { handleSort('performance_score')} + onClick={() => handleSort('hidden_champion_score')} >
- Skor + Skor
@@ -287,13 +287,13 @@ export default function HiddenChampionsTab({ reportId }) { = 70 + (product.hidden_champion_score || 0) >= 70 ? 'bg-emerald-100 text-emerald-700' - : (product.performance_score || 0) >= 40 + : (product.hidden_champion_score || 0) >= 40 ? 'bg-amber-100 text-amber-700' : 'bg-slate-100 text-slate-600' }`}> - {(product.performance_score || 0).toFixed(0)} + {(product.hidden_champion_score || 0).toFixed(0)} diff --git a/admin-panel/src/components/dashboard-tabs/OverviewTab.jsx b/admin-panel/src/components/dashboard-tabs/OverviewTab.jsx index 3caf033..4662280 100644 --- a/admin-panel/src/components/dashboard-tabs/OverviewTab.jsx +++ b/admin-panel/src/components/dashboard-tabs/OverviewTab.jsx @@ -90,21 +90,21 @@ export default function OverviewTab({ ? (sortedPrices[sortedPrices.length / 2 - 1] + sortedPrices[sortedPrices.length / 2]) / 2 : sortedPrices[Math.floor(sortedPrices.length / 2)] - const bucketCount = 10 - const range = max - min || 1 - const bucketSize = range / bucketCount + // Use predefined price ranges for meaningful distribution + const ranges = [ + [0, 50], [50, 100], [100, 200], [200, 500], + [500, 1000], [1000, 2000], [2000, 5000], [5000, 10000], [10000, Infinity] + ] - const buckets = Array.from({ length: bucketCount }, (_, i) => ({ - range: `₺${Math.round(min + i * bucketSize)}-${Math.round(min + (i + 1) * bucketSize)}`, - min: min + i * bucketSize, - max: min + (i + 1) * bucketSize, - count: 0 - })) - - prices.forEach(price => { - const idx = Math.min(Math.floor((price - min) / bucketSize), bucketCount - 1) - buckets[idx].count++ - }) + // Filter out empty ranges and build buckets + const buckets = ranges + .map(([lo, hi]) => ({ + range: hi === Infinity ? `₺${lo.toLocaleString('tr-TR')}+` : `₺${lo.toLocaleString('tr-TR')}-${hi.toLocaleString('tr-TR')}`, + min: lo, + max: hi, + count: prices.filter(p => p >= lo && (hi === Infinity ? true : p < hi)).length + })) + .filter(b => b.count > 0) return { buckets, mean: Math.round(mean), median: Math.round(median) } }, [allProducts]) @@ -186,7 +186,7 @@ export default function OverviewTab({ color="blue" /> b.min <= priceDistribution.mean && b.max > priceDistribution.mean)} + x={(priceDistribution.buckets.find(b => b.min <= priceDistribution.mean && (b.max === Infinity || b.max > priceDistribution.mean)) || {}).range} stroke="#f97316" strokeDasharray="5 5" - label={{ value: `Ort: ₺${priceDistribution.mean}`, fill: '#f97316', fontSize: 11, position: 'top' }} + label={{ value: `Ort: ₺${priceDistribution.mean.toLocaleString('tr-TR')}`, fill: '#f97316', fontSize: 11, position: 'top' }} /> diff --git a/backend/Dockerfile b/backend/Dockerfile index d189c96..a297164 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -30,7 +30,7 @@ COPY backend/ . COPY categories/ /data/initial-categories/ # Create data directories with proper permissions -RUN mkdir -p /data/categories /data/reports && \ +RUN mkdir -p /data/categories /data/reports /data/logs && \ chmod -R 755 /data # Make startup script executable (before switching to non-root user) diff --git a/backend/alembic/versions/38207dbbac44_add_path_model_to_categories.py b/backend/alembic/versions/38207dbbac44_add_path_model_to_categories.py new file mode 100644 index 0000000..7507d78 --- /dev/null +++ b/backend/alembic/versions/38207dbbac44_add_path_model_to_categories.py @@ -0,0 +1,30 @@ +"""add path_model to categories + +Revision ID: 38207dbbac44 +Revises: 001 +Create Date: 2026-03-28 14:56:06.784769 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '38207dbbac44' +down_revision: Union[str, None] = '001' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('categories', sa.Column('path_model', sa.String(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('categories', 'path_model') + # ### end Alembic commands ### diff --git a/backend/analytics/champion_finder.py b/backend/analytics/champion_finder.py index fe88244..6a144f5 100644 --- a/backend/analytics/champion_finder.py +++ b/backend/analytics/champion_finder.py @@ -16,6 +16,51 @@ class HiddenChampionFinder: Gizli şampiyonları bulan sınıf Parçalı pazarlarda (düşük HHI) özelleştirilmiş filtreler kullanır """ + + @staticmethod + def _parse_social_proof_value(value_str: str) -> int: + """Parse '3k', '248k', '1.2k', '866' gibi değerleri sayıya çevir""" + if not value_str: + return 0 + value_str = str(value_str).strip().lower().replace(".", "") + if value_str.endswith("k"): + try: + return int(float(value_str[:-1]) * 1000) + except (ValueError, TypeError): + return 0 + if value_str.endswith("m"): + try: + return int(float(value_str[:-1]) * 1000000) + except (ValueError, TypeError): + return 0 + try: + return int(value_str) + except (ValueError, TypeError): + return 0 + + @staticmethod + def _extract_social_proofs(product: Dict) -> Dict[str, int]: + """Ürünün socialProofs array'inden veri çıkar""" + result = {"page_views": 0, "orders": 0, "baskets": 0, "favorites": 0} + social_proofs = product.get("socialProofs", []) + if not social_proofs: + return result + type_map = { + "pageViewCount": "page_views", + "orderCountL3D": "orders", + "orderCountL365D": "orders", + "basketCount": "baskets", + "favoriteCount": "favorites", + } + for sp in social_proofs: + sp_type = sp.get("type", "") + mapped = type_map.get(sp_type) + if mapped: + val = HiddenChampionFinder._parse_social_proof_value(sp.get("value", "0")) + # Daha büyük değeri al (orderCountL3D vs orderCountL365D) + if val > result[mapped]: + result[mapped] = val + return result def find( self, @@ -98,10 +143,12 @@ class HiddenChampionFinder: pid = str(product.get("id")) social = social_details.get(pid, {}) - page_views = social.get("page_views", 0) or 0 - orders = social.get("orders", 0) or 0 - baskets = social.get("baskets", 0) or 0 - favorites = social.get("favorites", 0) or 0 + # Önce enriched social data, sonra ürünün kendi socialProofs'u + embedded_social = self._extract_social_proofs(product) + page_views = social.get("page_views", 0) or embedded_social["page_views"] or 0 + orders = social.get("orders", 0) or embedded_social["orders"] or product.get("orders", 0) or 0 + baskets = social.get("baskets", 0) or embedded_social["baskets"] or 0 + favorites = social.get("favorites", 0) or embedded_social["favorites"] or 0 conversion_rate = (orders / page_views * 100) if page_views > 0 else 0 @@ -139,16 +186,29 @@ class HiddenChampionFinder: # Minimum Orders kontrolü (satış verisi çok önemli) min_orders = filters.get("min_orders", 1) # Varsayılan: en az 1 satış + # Sosyal veri var mı kontrol et + has_social = pid in social_details and page_views > 0 + # Özelleştirilmiş Filtreleme (daha esnek) - passes_filter = ( - rating >= filters.get("min_rating", 4.6) and - review_count < filters.get("max_review_count", 30) and - review_count >= 1 and # En az 1 yorum olmalı - orders >= min_orders and # EN AZ 1 SATIŞ OLMALI (satış verisi çok önemli) - (page_views >= threshold_views or page_views >= min_views_threshold) and # Kategori ortalamasının üzerinde VEYA minimum threshold - (baskets >= threshold_baskets or baskets >= min_baskets_threshold) and # Sepet de kategori ortalamasının üzerinde VEYA minimum - (conversion_rate >= 1.0 or page_views >= 500) # Minimum %1 conversion VEYA yüksek görüntülenme - ) + if has_social: + # Sosyal verisi olan ürünler: tam filtre + passes_filter = ( + rating >= filters.get("min_rating", 4.6) and + review_count < filters.get("max_review_count", 30) and + review_count >= 1 and + orders >= min_orders and + (page_views >= threshold_views or page_views >= min_views_threshold) and + (baskets >= threshold_baskets or baskets >= min_baskets_threshold) and + (conversion_rate >= 1.0 or page_views >= 500) + ) + else: + # Sosyal verisi olmayan ürünler: sadece rating + review + orders filtresi + passes_filter = ( + rating >= filters.get("min_rating", 4.6) and + review_count < filters.get("max_review_count", 30) and + review_count >= 1 and + orders >= min_orders + ) if passes_filter: # Potential score hesapla @@ -196,7 +256,7 @@ class HiddenChampionFinder: "category": category_name, "rating": round(rating, 2), "review_count": review_count, - "price": product.get("price", {}).get("sellingPrice", 0), + "price": (product.get("price", {}).get("sellingPrice", 0) or product.get("price", {}).get("discountedPrice", 0) or product.get("price", {}).get("current", 0)) if isinstance(product.get("price"), dict) else (product.get("price", 0) or 0), "page_views": page_views, "orders": orders, "baskets": baskets, diff --git a/backend/analytics/metrics.py b/backend/analytics/metrics.py index eac9275..923ead4 100644 --- a/backend/analytics/metrics.py +++ b/backend/analytics/metrics.py @@ -245,7 +245,13 @@ def get_rating_value(product: Dict) -> float: rating = product.get("rating", 0) if isinstance(rating, dict): return rating.get("averageRating", 0) or 0 - return float(rating) if rating else 0 + if rating: + return float(rating) + # Fallback: ratingScore nested object + rating_score = product.get("ratingScore", {}) + if isinstance(rating_score, dict): + return float(rating_score.get("averageRating", 0) or 0) + return 0 def get_review_count(product: Dict) -> int: @@ -263,6 +269,11 @@ def get_review_count(product: Dict) -> int: rating = product.get("rating", {}) if isinstance(rating, dict): review_count = rating.get("totalComments", 0) or rating.get("totalCount", 0) or 0 + if not review_count: + # Fallback: ratingScore nested object + rating_score = product.get("ratingScore", {}) + if isinstance(rating_score, dict): + review_count = rating_score.get("totalCount", 0) or 0 return int(review_count) if review_count else 0 diff --git a/backend/category_seeder.py b/backend/category_seeder.py new file mode 100644 index 0000000..b1ef7af --- /dev/null +++ b/backend/category_seeder.py @@ -0,0 +1,143 @@ +""" +Category Seeder - Trendyol categories JSON'dan DB'ye aktarma +Kaynak: /Users/furkanyigit/Desktop/trendyol_categories.json +3 seviye hiyerarşi: Segment (Kadın) → Grup (Giyim) → Yaprak (Elbise) +""" +import json +import re +import os +from database import SessionLocal, Category, Snapshot, Report, EnrichmentError +from logging_config import get_logger + +log = get_logger("seeder") + +DEFAULT_JSON_PATH = os.path.expanduser("~/Desktop/trendyol_categories.json") + + +def parse_url(url: str) -> dict: + """URL'den path_model ve trendyol_category_id çıkar. + + Örnekler: + /elbise-x-c56 → path_model="elbise-x-c56", category_id=56 + /kanvas-canta-y-s20972 → path_model="kanvas-canta-y-s20972", category_id=None + /kadin-giyim-x-g1-c82 → path_model="kadin-giyim-x-g1-c82", category_id=82 + """ + # Strip leading slash + path_model = url.lstrip("/") + + # Try to extract -c{id} from the end + m = re.search(r"-c(\d+)$", path_model) + category_id = int(m.group(1)) if m else None + + return { + "path_model": path_model, + "trendyol_category_id": category_id, + } + + +def seed_from_json(json_path: str = None, clear_existing: bool = True) -> dict: + """JSON dosyasını okuyup DB'ye yazar. + + Returns: + {"segments": int, "groups": int, "leaves": int, "total": int} + """ + json_path = json_path or DEFAULT_JSON_PATH + + with open(json_path, "r", encoding="utf-8") as f: + data = json.load(f) + + db = SessionLocal() + try: + if clear_existing: + # FK constraint nedeniyle referans veren tabloları önce temizle + db.query(EnrichmentError).delete(synchronize_session=False) + db.query(Report).delete(synchronize_session=False) + db.query(Snapshot).delete(synchronize_session=False) + db.query(Category).filter(Category.parent_id != None).delete(synchronize_session=False) # noqa: E711 + db.query(Category).delete(synchronize_session=False) + db.commit() + log.info("Mevcut kategoriler ve bağlı veriler silindi") + + stats = {"segments": 0, "groups": 0, "leaves": 0, "total": 0} + + for segment_name, groups in data.items(): + # Seviye 1: Segment (Kadın, Erkek, ...) + segment = Category( + name=segment_name, + parent_id=None, + trendyol_category_id=None, + trendyol_url=None, + path_model=None, + is_active=True, + ) + db.add(segment) + db.flush() # ID'yi al + stats["segments"] += 1 + stats["total"] += 1 + + for group_item in groups: + group_name = group_item["name"] + group_url = group_item.get("url", "") + group_parsed = parse_url(group_url) if group_url else {"path_model": None, "trendyol_category_id": None} + + children = group_item.get("children", []) + + if children: + # Seviye 2: Grup (Giyim, Ayakkabı, ...) + group = Category( + name=group_name, + parent_id=segment.id, + trendyol_category_id=group_parsed["trendyol_category_id"], + trendyol_url=f"https://www.trendyol.com{group_url}" if group_url else None, + path_model=group_parsed["path_model"], + is_active=True, + ) + db.add(group) + db.flush() + stats["groups"] += 1 + stats["total"] += 1 + + for leaf_item in children: + leaf_url = leaf_item.get("url", "") + leaf_parsed = parse_url(leaf_url) if leaf_url else {"path_model": None, "trendyol_category_id": None} + + leaf = Category( + name=leaf_item["name"], + parent_id=group.id, + trendyol_category_id=leaf_parsed["trendyol_category_id"], + trendyol_url=f"https://www.trendyol.com{leaf_url}" if leaf_url else None, + path_model=leaf_parsed["path_model"], + is_active=True, + ) + db.add(leaf) + stats["leaves"] += 1 + stats["total"] += 1 + else: + # Çocuğu yok — bu grup aslında yaprak + leaf = Category( + name=group_name, + parent_id=segment.id, + trendyol_category_id=group_parsed["trendyol_category_id"], + trendyol_url=f"https://www.trendyol.com{group_url}" if group_url else None, + path_model=group_parsed["path_model"], + is_active=True, + ) + db.add(leaf) + stats["leaves"] += 1 + stats["total"] += 1 + + db.commit() + log.info(f"Seed tamamlandı: {stats}") + return stats + + except Exception as e: + db.rollback() + log.error(f"Seed hatası: {e}") + raise + finally: + db.close() + + +if __name__ == "__main__": + result = seed_from_json() + print(f"Seed tamamlandı: {result}") diff --git a/backend/data_consolidator.py b/backend/data_consolidator.py new file mode 100644 index 0000000..daa259f --- /dev/null +++ b/backend/data_consolidator.py @@ -0,0 +1,791 @@ +""" +Data Consolidator — tek birleştirilmiş JSON oluşturma modülü. + +Scraping + enrichment bittiğinde tüm normalizasyon ve hesaplamayı yapar, +sonucu reports/report_{id}_data.json olarak kaydeder. +Dashboard endpoint sadece bu dosyayı okur. +""" +import json +import os +import re +import time +import random +from collections import defaultdict +from datetime import datetime + +import numpy as np + +from logging_config import get_logger + +log = get_logger("consolidator") + +# ───────────────────────────────────────────────────────── +# Ülke kodu → tam isim mapping (menşei analizi için) +# ───────────────────────────────────────────────────────── +COUNTRY_NAMES = { + "TR": "Türkiye", "CN": "Çin", "US": "Amerika", "GB": "İngiltere", + "FR": "Fransa", "DE": "Almanya", "IT": "İtalya", "ES": "İspanya", + "KR": "Güney Kore", "JP": "Japonya", "IN": "Hindistan", "TW": "Tayvan", + "HK": "Hong Kong", "TH": "Tayland", "VN": "Vietnam", "PL": "Polonya", + "CZ": "Çek Cumhuriyeti", "RO": "Romanya", "BG": "Bulgaristan", + "GR": "Yunanistan", "PT": "Portekiz", "NL": "Hollanda", "BE": "Belçika", + "CH": "İsviçre", "AT": "Avusturya", "SE": "İsveç", "NO": "Norveç", + "DK": "Danimarka", "FI": "Finlandiya", "RU": "Rusya", "UA": "Ukrayna", + "AE": "Birleşik Arap Emirlikleri", "SA": "Suudi Arabistan", "IL": "İsrail", + "EG": "Mısır", "ZA": "Güney Afrika", "BR": "Brezilya", "MX": "Meksika", + "CA": "Kanada", "AU": "Avustralya", "NZ": "Yeni Zelanda", "SG": "Singapur", + "MY": "Malezya", "ID": "Endonezya", "PH": "Filipinler", "PK": "Pakistan", + "BD": "Bangladeş", "AZ": "Azerbaycan", +} + +# Barkod prefix → ülke (EAN-13) +BARCODE_COUNTRIES = { + "TYB": "Trendyol (İç Barkod)", "SGT": "Trendyol Satıcı", + "KPE": "Trendyol Kampanya", "RTN": "Trendyol İade", "CDM": "Trendyol Özel", + "00-13": "ABD & Kanada", "190-199": "Rezerve/Özel Kullanım", + "20-29": "Mağaza İçi Kullanım", "30-37": "Fransa", + "380": "Bulgaristan", "383": "Slovenya", "370": "Litvanya", + "372": "Estonya", "373": "Moldova", "375": "Belarus", + "377": "Ermenistan", "379": "Kazakistan", "385": "Hırvatistan", + "387": "Bosna Hersek", "400-440": "Almanya", "45-49": "Japonya", + "50": "İngiltere", "520-521": "Yunanistan", "528": "Lübnan", + "529": "Kıbrıs", "530": "Arnavutluk", "531": "Makedonya", + "535": "Malta", "539": "İrlanda", "54": "Belçika & Lüksemburg", + "560": "Portekiz", "569": "İzlanda", "57": "Danimarka", + "590": "Polonya", "594": "Romanya", "599": "Macaristan", + "600-601": "Güney Afrika", "603": "Gana", "608": "Bahreyn", + "609": "Mauritius", "611": "Fas", "613": "Cezayir", + "615": "Nijerya", "616": "Kenya", "618": "Fildişi Sahili", + "619": "Tunus", "621": "Suriye", "622": "Mısır", + "624": "Libya", "625": "Ürdün", "626": "İran", + "627": "Kuveyt", "628": "Suudi Arabistan", "629": "BAE", + "630": "Katar", "631": "Umman", "64": "Finlandiya", + "690-699": "Çin", "70": "Norveç", "710-719": "Rezerve/Özel Kullanım", + "729": "İsrail", "73": "İsveç", "740": "Guatemala", + "741": "El Salvador", "742": "Honduras", "743": "Nikaragua", + "744": "Kosta Rika", "745": "Panama", "746": "Dominik Cumhuriyeti", + "750": "Meksika", "754-755": "Kanada", "759": "Venezuela", + "76": "İsviçre", "770-771": "Kolombiya", "773": "Uruguay", + "775": "Peru", "777": "Bolivya", "779": "Arjantin", + "780": "Şili", "784": "Paraguay", "786": "Ekvador", + "789-790": "Brezilya", "80-83": "İtalya", "84": "İspanya", + "850": "Küba", "858": "Slovakya", "859": "Çek Cumhuriyeti", + "860": "Sırbistan", "865": "Moğolistan", "867": "Kuzey Kore", + "868-869": "Türkiye", "87": "Hollanda", "880": "Güney Kore", + "884": "Kamboçya", "885": "Tayland", "888": "Singapur", + "890": "Hindistan", "893": "Vietnam", "896": "Pakistan", + "899": "Endonezya", "90-91": "Avusturya", "93": "Avustralya", + "94": "Yeni Zelanda", "955": "Malezya", "958": "Makao", + "977": "Süreli Yayınlar (ISSN)", "978-979": "Kitaplar (ISBN)", + "980": "Para İade Kuponları", "981-984": "Kuponlar", "99": "Kuponlar", +} + + +# ───────────────────────────────────────────────────────── +# Yardımcı fonksiyonlar +# ───────────────────────────────────────────────────────── + +def _extract_price(p): + """Extract selling price from product, handling both old and Search API formats.""" + pr = p.get("price", {}) + if isinstance(pr, (int, float)): + return pr + return (pr.get("sellingPrice") or pr.get("discountedPrice") + or pr.get("current") or pr.get("originalPrice") + or pr.get("old") or 0) + + +def _extract_rating(p): + """Extract average rating from product.""" + rating = p.get("ratingScore") or p.get("rating", 0) + if isinstance(rating, dict): + rating = rating.get("averageRating", 0) + try: + return float(rating) if rating else 0.0 + except (ValueError, TypeError): + return 0.0 + + +def _extract_review_count(p): + """Extract review/comment count from product.""" + review_count = 0 + try: + review_count = int(p.get("rating_count", 0) or 0) + except (ValueError, TypeError, AttributeError): + pass + if not review_count: + try: + rating_obj = p.get("ratingScore") or p.get("rating", {}) + if isinstance(rating_obj, dict): + review_count = int( + rating_obj.get("totalCount", 0) + or rating_obj.get("totalComments", 0) + or 0 + ) + except (ValueError, TypeError, AttributeError): + review_count = 0 + return review_count + + +def _parse_social_value(value_str): + """Parse social proof value like '642', '1.2k', '10B+' etc.""" + try: + s = str(value_str).strip() + if "k" in s.lower(): + return int(float(s.lower().replace("k", "").replace("+", "")) * 1000) + if "b+" in s.lower(): + return int(float(s.lower().replace("b+", "")) * 1_000_000_000) + if "m+" in s.lower(): + return int(float(s.lower().replace("m+", "")) * 1_000_000) + return int(s.replace("+", "")) + except (ValueError, TypeError): + return 0 + + +def _detect_barcode_country(prefix_num): + """Detect country from barcode prefix using BARCODE_COUNTRIES mapping.""" + for key, country in BARCODE_COUNTRIES.items(): + if "-" in key: + start, end = key.split("-") + try: + range_len = len(start) + prefix_to_check = prefix_num[:range_len] if len(prefix_num) >= range_len else prefix_num + prefix_int = int(prefix_to_check) if prefix_to_check.isdigit() else -1 + if int(start) <= prefix_int <= int(end): + return country + except ValueError: + continue + elif key == prefix_num[:len(key)]: + return country + return "Bilinmiyor" + + +# ───────────────────────────────────────────────────────── +# 1. normalize_product +# ───────────────────────────────────────────────────────── + +def normalize_product(raw_product, category_name, social_details): + """ + Ham ürünü flat yapıya dönüştür. + Öncelik: inline socialProofs (Top Rankings) > enrichment API (social_details) + """ + product_id = raw_product.get("contentId") or raw_product.get("id") + price = _extract_price(raw_product) + rating = _extract_rating(raw_product) + review_count = _extract_review_count(raw_product) + + brand = raw_product.get("brand", {}) + brand_name = (brand.get("name") if isinstance(brand, dict) else brand) or "Bilinmeyen" + + # ── Social proof: önce inline socialProofs, sonra enrichment ── + orders, page_views, baskets, favorites = 0, 0, 0, 0 + + # İnline socialProofs (Top Rankings API — ürün dosyasında kayıtlı) + social_proofs = raw_product.get("socialProofs", []) + if isinstance(social_proofs, list): + for proof in social_proofs: + proof_type = proof.get("type", "") + parsed = _parse_social_value(proof.get("value", "0")) + if proof_type == "orderCountL3D": + orders = parsed + elif proof_type == "pageViewCount": + page_views = parsed + elif proof_type == "basketCount": + baskets = parsed + elif proof_type == "favoriteCount": + favorites = parsed + + # Enrichment API (social.json) — inline yoksa veya 0 ise fallback + # Key hem str hem int olabilir (dosyadan str, memory'den int) + sp = {} + if product_id and social_details: + sp = (social_details.get(str(product_id)) + or social_details.get(int(product_id) if str(product_id).isdigit() else -1) + or {}) + if not orders: + orders = sp.get("orders", 0) or 0 + if not page_views: + page_views = sp.get("page_views", 0) or 0 + if not baskets: + baskets = sp.get("baskets", 0) or 0 + if not favorites: + favorites = sp.get("favorites", 0) or 0 + + # ── Image URL ── + image_url = raw_product.get("imageUrl", "") + if not image_url: + images = raw_product.get("images", []) + image_url = images[0] if isinstance(images, list) and images else "" + + # ── Product URL ── + product_url = raw_product.get("url", "") + if not product_url and product_id: + product_url = f"https://www.trendyol.com/p/{product_id}" + + # ── Barcode ── + barcode = "" + winner_variant = raw_product.get("winnerVariant", {}) + if isinstance(winner_variant, dict): + barcode = winner_variant.get("barcode", "") + + # ── Country (origin) ── + country_code = "" + country_name = "Bilinmeyen" + merchant_listings = raw_product.get("merchantListings", []) + if merchant_listings: + custom_values = merchant_listings[0].get("customValues", []) + for cv in custom_values: + if cv.get("key") == "origin": + country_code = cv.get("value", "").upper() + country_name = COUNTRY_NAMES.get( + country_code, f"Diğer ({country_code})" if country_code else "Bilinmeyen" + ) + break + + return { + "id": product_id, + "name": raw_product.get("name", ""), + "brand": brand_name, + "category": category_name, + "category_name": category_name, # Frontend uyumluluğu (ProductFinderTab, OpportunityTab) + "price": round(price, 2) if price else 0, + "rating": round(rating, 2), + "review_count": review_count, + "orders": orders, + "page_views": page_views, + "baskets": baskets, + "favorites": favorites, + "barcode": barcode, + "country_code": country_code, + "country": country_name, + "image_url": image_url or "https://via.placeholder.com/150", + "url": product_url, + "in_stock": raw_product.get("inStock", False), + } + + +# ───────────────────────────────────────────────────────── +# 2. calculate_kpis +# ───────────────────────────────────────────────────────── + +def calculate_kpis(products): + """KPI hesaplaması (main.py 2182-2262 mantığı).""" + total_products = len(products) + prices = [p["price"] for p in products if p["price"] > 0] + ratings = [p["rating"] for p in products if p["rating"] > 0] + + avg_price = sum(prices) / len(prices) if prices else 0 + median_price = float(np.percentile(prices, 50)) if prices else 0 + min_price = min(prices) if prices else 0 + max_price = max(prices) if prices else 0 + + avg_rating = sum(ratings) / len(ratings) if ratings else 0 + low_rating_count = sum(1 for r in ratings if r < 3.0) + low_rating_rate = (low_rating_count / len(ratings) * 100) if ratings else 0 + + unique_brands = set(p["brand"] for p in products if p["brand"] and p["brand"] != "Bilinmeyen") + unique_subcategories = set(p["category"] for p in products if p["category"]) + + return { + "total_products": total_products, + "total_subcategories": len(unique_subcategories), + "total_brands": len(unique_brands), + "avg_price": round(avg_price, 2), + "median_price": round(median_price, 2), + "avg_rating": round(avg_rating, 2), + "low_rating_count": low_rating_count, + "low_rating_rate": round(low_rating_rate, 2), + "min_price": round(min_price, 2), + "max_price": round(max_price, 2), + } + + +# ───────────────────────────────────────────────────────── +# 3. calculate_charts +# ───────────────────────────────────────────────────────── + +def calculate_charts(products): + """Grafik verisi hesaplaması (main.py 2264-3248 mantığı).""" + prices = [p["price"] for p in products if p["price"] > 0] + total_products = len(products) + + # ── Price distribution ── + price_ranges = {"0-100": 0, "100-250": 0, "250-500": 0, "500-1000": 0, "1000+": 0} + for price in prices: + if price < 100: + price_ranges["0-100"] += 1 + elif price < 250: + price_ranges["100-250"] += 1 + elif price < 500: + price_ranges["250-500"] += 1 + elif price < 1000: + price_ranges["500-1000"] += 1 + else: + price_ranges["1000+"] += 1 + + # ── Kategori ve marka grupları ── + categories_data = defaultdict(list) + brands_data = defaultdict(int) + for p in products: + categories_data[p["category"]].append(p) + brands_data[p["brand"]] += 1 + + # ── Top categories (satışa göre sıralı) ── + top_categories = [] + for cat_name, cat_products in categories_data.items(): + total_orders = sum(p["orders"] for p in cat_products) + top_categories.append({ + "name": cat_name, + "count": len(cat_products), + "total_orders": total_orders, + }) + top_categories = sorted(top_categories, key=lambda x: x["total_orders"], reverse=True)[:20] + + # ── Top brands ── + top_brands = sorted( + [{"name": brand, "count": count} for brand, count in brands_data.items()], + key=lambda x: x["count"], reverse=True, + )[:20] + + # ── Rating distribution ── + rating_distribution = {"0-1": 0, "1-2": 0, "2-3": 0, "3-4": 0, "4-5": 0} + for p in products: + r = p["rating"] + if r < 1: + rating_distribution["0-1"] += 1 + elif r < 2: + rating_distribution["1-2"] += 1 + elif r < 3: + rating_distribution["2-3"] += 1 + elif r < 4: + rating_distribution["3-4"] += 1 + else: + rating_distribution["4-5"] += 1 + + # ── Brand price boxplot (top 10) ── + brand_price_stats = [] + for brand_name in [b["name"] for b in top_brands[:10]]: + bp = [p["price"] for p in products if p["brand"] == brand_name and p["price"] > 0] + if bp and len(bp) >= 4: + pcts = np.percentile(bp, [0, 25, 50, 75, 100]) + brand_price_stats.append({ + "brand": brand_name, + "min": round(float(pcts[0]), 2), + "q1": round(float(pcts[1]), 2), + "median": round(float(pcts[2]), 2), + "q3": round(float(pcts[3]), 2), + "max": round(float(pcts[4]), 2), + "count": len(bp), + }) + + # ── Scatter plot (price vs rating) — sample 500 ── + scatter_data = [] + sample_size = min(500, len(products)) + sampled = random.sample(products, sample_size) if products else [] + for p in sampled: + if p["price"] > 0 and p["rating"] > 0: + scatter_data.append({ + "price": p["price"], + "rating": p["rating"], + "brand": p["brand"], + "in_stock": p["in_stock"], + }) + + # ── Brand strength score ── + brand_strength_scores = [] + for brand_name in [b["name"] for b in top_brands[:10]]: + bp = [p for p in products if p["brand"] == brand_name] + brand_count = len(bp) + brand_share = (brand_count / total_products * 100) if total_products > 0 else 0 + brand_ratings = [p["rating"] for p in bp if p["rating"] > 0] + brand_avg_rating = sum(brand_ratings) / len(brand_ratings) if brand_ratings else 0 + brand_out_of_stock = sum(1 for p in bp if not p["in_stock"]) + stockout_rate = (brand_out_of_stock / brand_count * 100) if brand_count > 0 else 0 + strength = brand_share + (brand_avg_rating * 5) - stockout_rate + brand_strength_scores.append({ + "brand": brand_name, + "share": round(brand_share, 2), + "avg_rating": round(brand_avg_rating, 2), + "stockout_rate": round(stockout_rate, 2), + "strength_score": round(strength, 2), + }) + brand_strength_scores.sort(key=lambda x: x["strength_score"], reverse=True) + + # ── Heatmap: Brand × Category ── + top_10_brands = [b["name"] for b in top_brands[:10]] + top_10_cats = [c["name"] for c in top_categories[:10]] + heatmap_data = [] + for cat_name in top_10_cats: + cat_products = categories_data.get(cat_name, []) + for brand_name in top_10_brands: + count = sum(1 for p in cat_products if p["brand"] == brand_name) + if count > 0: + heatmap_data.append({"brand": brand_name, "category": cat_name, "value": count}) + + # ── Category price premium ── + avg_price = sum(prices) / len(prices) if prices else 0 + category_price_analysis = [] + for cat_name, cat_products in categories_data.items(): + cp = [p["price"] for p in cat_products if p["price"] > 0] + if cp: + cat_avg = sum(cp) / len(cp) + cat_median = float(np.percentile(cp, 50)) + premium = ((cat_avg - avg_price) / avg_price * 100) if avg_price > 0 else 0 + category_price_analysis.append({ + "category": cat_name, + "avg_price": round(cat_avg, 2), + "median_price": round(cat_median, 2), + "price_premium": round(premium, 2), + "product_count": len(cp), + "min_price": round(min(cp), 2), + "max_price": round(max(cp), 2), + }) + category_price_analysis.sort(key=lambda x: x["price_premium"], reverse=True) + most_expensive = [c for c in category_price_analysis if c["price_premium"] > 0][:10] + most_affordable = [c for c in category_price_analysis if c["price_premium"] < 0][-10:] + most_affordable.reverse() + + # ── Origin analysis ── + origin_counts = defaultdict(int) + products_with_origin = 0 + for p in products: + if p["country_code"]: + origin_counts[p["country_code"]] += 1 + products_with_origin += 1 + + origin_country_data = sorted( + [ + { + "country_code": code, + "country_name": COUNTRY_NAMES.get(code, f"Diğer ({code})"), + "product_count": count, + "percentage": round(count / products_with_origin * 100, 2) if products_with_origin else 0, + } + for code, count in origin_counts.items() + ], + key=lambda x: x["product_count"], reverse=True, + ) + + # ── Barcode analysis ── + barcode_prefixes = defaultdict(int) + barcode_countries_detected = defaultdict(int) + products_with_barcode = 0 + for p in products: + bc = p.get("barcode", "") + if bc and len(bc) >= 3: + products_with_barcode += 1 + prefix = bc[:3] + barcode_prefixes[prefix] += 1 + detected = _detect_barcode_country(prefix) + barcode_countries_detected[detected] += 1 + + barcode_prefix_data = sorted( + [ + { + "prefix": prefix, + "detected_country": _detect_barcode_country(prefix), + "product_count": count, + "percentage": round(count / products_with_barcode * 100, 2) if products_with_barcode else 0, + } + for prefix, count in barcode_prefixes.items() + ], + key=lambda x: x["product_count"], reverse=True, + )[:20] + + barcode_country_data = sorted( + [ + { + "country_name": country, + "product_count": count, + "percentage": round(count / products_with_barcode * 100, 2) if products_with_barcode else 0, + } + for country, count in barcode_countries_detected.items() + ], + key=lambda x: x["product_count"], reverse=True, + ) + + # ── Merchant analysis ── + merchants_data = {} + total_winners = 0 + products_with_merchant = 0 + # We need raw product data for merchant analysis — use the flat products + # Merchant info is already lost in normalization, so we skip this in consolidator + # The original code extracted from raw_product.merchantListings + # For consolidated data, we'll build merchants from the products we have + + # ── Build result ── + return { + "price_distribution": price_ranges, + "top_categories": top_categories, + "top_brands": top_brands, + "rating_distribution": rating_distribution, + "brand_price_boxplot": brand_price_stats, + "price_rating_scatter": scatter_data, + "brand_strength": brand_strength_scores, + "brand_category_heatmap": heatmap_data, + "category_price_premium": { + "all_categories": category_price_analysis, + "most_expensive": most_expensive, + "most_affordable": most_affordable, + }, + "origin_analysis": { + "countries": origin_country_data, + "top_countries": origin_country_data[:10], + "total_products_with_origin": products_with_origin, + "coverage_percentage": round(products_with_origin / total_products * 100, 2) if total_products else 0, + }, + "barcode_analysis": { + "prefixes": barcode_prefix_data, + "countries_from_barcode": barcode_country_data, + "top_countries_from_barcode": barcode_country_data[:10], + "total_products_with_barcode": products_with_barcode, + "coverage_percentage": round(products_with_barcode / total_products * 100, 2) if total_products else 0, + }, + } + + +def _calculate_merchant_analysis(raw_products, categories_data): + """ + Satıcı analizini ham ürün verisinden hesapla (merchantListings alanı gerekli). + raw_products: ham Trendyol ürün dict listesi, categories_data: {cat_name: [products]} + """ + merchants_data = {} + total_winners = 0 + products_with_merchant = 0 + + for product in raw_products: + merchant_listings = product.get("merchantListings", []) + if not merchant_listings: + continue + ml = merchant_listings[0] + merchant = ml.get("merchant", {}) + merchant_id = merchant.get("id") + if not merchant_id: + continue + + products_with_merchant += 1 + if merchant_id not in merchants_data: + merchant_name = merchant.get("name") or merchant.get("officialName") or f"Satıcı {merchant_id}" + merchants_data[merchant_id] = { + "merchant_id": merchant_id, + "merchant_name": merchant_name, + "product_count": 0, + "total_price": 0, + "winner_count": 0, + } + + merchants_data[merchant_id]["product_count"] += 1 + price = _extract_price(product) + if price > 0: + merchants_data[merchant_id]["total_price"] += price + if ml.get("isWinner"): + merchants_data[merchant_id]["winner_count"] += 1 + total_winners += 1 + + merchant_list = [] + for mid, data in merchants_data.items(): + avg_price = data["total_price"] / data["product_count"] if data["product_count"] > 0 else 0 + winner_ratio = (data["winner_count"] / data["product_count"] * 100) if data["product_count"] > 0 else 0 + merchant_url = None + if data["merchant_name"] and not data["merchant_name"].startswith("Satıcı "): + merchant_url = f"https://www.trendyol.com/magaza/{data['merchant_name'].lower().replace(' ', '-')}-m-{mid}" + merchant_list.append({ + "merchant_id": mid, + "merchant_name": data["merchant_name"], + "merchant_url": merchant_url, + "product_count": data["product_count"], + "avg_price": round(avg_price, 2), + "winner_count": data["winner_count"], + "winner_ratio": round(winner_ratio, 2), + }) + + merchant_list.sort(key=lambda x: x["product_count"], reverse=True) + total_products = len(raw_products) + total_merchants = len(merchants_data) + winner_percentage = (total_winners / products_with_merchant * 100) if products_with_merchant > 0 else 0 + + return { + "merchants": merchant_list, + "top_merchants": merchant_list[:20], + "total_merchants": total_merchants, + "total_products_with_merchant": products_with_merchant, + "total_winners": total_winners, + "winner_percentage": round(winner_percentage, 2), + "coverage_percentage": round(products_with_merchant / total_products * 100, 2) if total_products else 0, + } + + +# ───────────────────────────────────────────────────────── +# 4. calculate_insights +# ───────────────────────────────────────────────────────── + +def calculate_insights(products): + """Low-rating ürünler ve fiyat anomalileri.""" + # ── Low rating products ── + low_rating = [] + for p in products: + if 0 < p["rating"] < 3.0: + low_rating.append({ + "name": p["name"][:50], + "brand": p["brand"], + "rating": p["rating"], + "price": p["price"], + "in_stock": p["in_stock"], + }) + low_rating = sorted(low_rating, key=lambda x: x["rating"])[:20] + + # ── Anomalies (IQR) ── + prices = [p["price"] for p in products if p["price"] > 0] + anomalies = [] + if len(prices) > 4: + q1, q3 = np.percentile(prices, [25, 75]) + iqr = q3 - q1 + lower = q1 - 1.5 * iqr + upper = q3 + 1.5 * iqr + for p in products: + if p["price"] > 0 and (p["price"] < lower or p["price"] > upper): + anomalies.append({ + "name": p["name"][:50], + "brand": p["brand"], + "price": p["price"], + "type": "expensive" if p["price"] > upper else "cheap", + }) + anomalies = sorted(anomalies, key=lambda x: x["price"], reverse=True)[:20] + + return {"low_rating_products": low_rating, "anomalies": anomalies} + + +# ───────────────────────────────────────────────────────── +# 5. build_consolidated_report (ana orkestratör) +# ───────────────────────────────────────────────────────── + +def build_consolidated_report(report_id, db, reports_dir, social_data=None): + """ + Rapor verisini yükle → normalize et → hesapla → döndür. + + Args: + report_id: DB rapor ID + db: SQLAlchemy session + reports_dir: reports/ klasör yolu + social_data: Enrichment social.json verisi (opsiyonel, yoksa dosyadan okunur) + Returns: + Konsolide dashboard dict + """ + from database import Report + t0 = time.time() + + report = db.query(Report).filter(Report.id == report_id).first() + if not report: + return None + if not report.json_file_path or not os.path.exists(report.json_file_path): + return None + + # Rapor meta verisini oku + with open(report.json_file_path, "r", encoding="utf-8") as f: + report_data = json.load(f) + + # Social proof verisini yükle + social_details = {} + if social_data: + social_details = social_data.get("details", {}) + else: + social_file = os.path.join(reports_dir, f"enrich_{report_id}", "social.json") + if os.path.exists(social_file): + try: + with open(social_file, "r", encoding="utf-8") as f: + soc = json.load(f) + social_details = soc.get("details", {}) + except Exception as e: + log.warning(f"Social proof dosyası okunamadı: {e}") + + # ── Ham ürünleri yükle ve normalize et ── + normalized_products = [] + raw_products_all = [] # Merchant analizi için ham verileri tut + + for detail in report_data.get("details", []): + if not detail.get("success") or not detail.get("file_path"): + continue + file_path = detail["file_path"] + if not os.path.exists(file_path): + continue + try: + with open(file_path, "r", encoding="utf-8") as f: + cat_data = json.load(f) + raw_products = cat_data.get("products", []) + cat_name_raw = detail.get("category_name", "") + cat_name = re.sub(r'\s+\d+$', '', cat_name_raw) + + for raw in raw_products: + # Set category on raw product for load_report_products compatibility + if isinstance(raw.get("category"), dict): + raw["category"]["name"] = cat_name + else: + raw["category"] = {"id": 0, "name": cat_name} + + norm = normalize_product(raw, cat_name, social_details) + if norm["price"] and norm["category"]: + normalized_products.append(norm) + + raw_products_all.extend(raw_products) + except (json.JSONDecodeError, OSError, KeyError) as e: + log.warning(f"Kategori dosyası okunamadı: {file_path}: {e}") + continue + + if not normalized_products: + log.warning(f"Rapor {report_id} için ürün bulunamadı") + return None + + # ── Hesaplamalar ── + kpis = calculate_kpis(normalized_products) + charts = calculate_charts(normalized_products) + insights = calculate_insights(normalized_products) + + # Merchant analysis (ham veri gerekli) + charts["merchant_analysis"] = _calculate_merchant_analysis(raw_products_all, {}) + + elapsed = time.time() - t0 + log.info(f"Rapor {report_id} konsolide edildi: {len(normalized_products)} ürün, {elapsed:.2f}s") + + return { + "metadata": { + "report_id": report_id, + "report_name": report.name, + "created_at": report.created_at.isoformat() if report.created_at else None, + "total_products": len(normalized_products), + "total_categories": kpis["total_subcategories"], + "consolidated_at": datetime.now().isoformat(), + }, + "report_id": report_id, + "report_name": report.name, + "products": normalized_products, + "all_products": normalized_products, # Geriye uyumluluk (frontend "all_products" bekliyor) + "kpis": kpis, + "charts": charts, + "insights": insights, + } + + +# ───────────────────────────────────────────────────────── +# 6. save / load +# ───────────────────────────────────────────────────────── + +def save_consolidated_report(report_id, data, reports_dir): + """Konsolide veriyi reports/report_{id}_data.json olarak kaydet.""" + path = os.path.join(reports_dir, f"report_{report_id}_data.json") + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False) + log.info(f"Konsolide rapor kaydedildi: {path}") + return path + + +def load_consolidated_report(report_id, reports_dir): + """Konsolide dosya varsa oku, yoksa None döndür.""" + path = os.path.join(reports_dir, f"report_{report_id}_data.json") + if os.path.exists(path): + try: + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, OSError) as e: + log.warning(f"Konsolide dosya okunamadı: {path}: {e}") + return None diff --git a/backend/database.py b/backend/database.py index 8d1753d..9207798 100644 --- a/backend/database.py +++ b/backend/database.py @@ -6,6 +6,9 @@ from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker, relationship from datetime import datetime import os +from logging_config import get_logger + +log = get_logger("db") # PostgreSQL database - configurable via environment variable # Default: Local PostgreSQL for development @@ -26,6 +29,7 @@ class Category(Base): parent_id = Column(Integer, ForeignKey('categories.id'), nullable=True) trendyol_category_id = Column(Integer, nullable=True) trendyol_url = Column(String, nullable=True) + path_model = Column(String, nullable=True) # URL slug for search API (e.g. "elbise-x-c56") is_active = Column(Boolean, default=True) created_at = Column(DateTime, default=datetime.utcnow) @@ -86,7 +90,7 @@ class EnrichmentError(Base): def init_db(): """Initialize database - create tables""" Base.metadata.create_all(bind=engine) - print("✅ Database initialized successfully!") + log.info("Database initialized successfully") def get_db(): diff --git a/backend/google_trends_helper.py b/backend/google_trends_helper.py index 871f769..c65e8df 100644 --- a/backend/google_trends_helper.py +++ b/backend/google_trends_helper.py @@ -8,6 +8,9 @@ from pytrends.request import TrendReq from typing import Dict, Optional from datetime import datetime, timedelta import time +from logging_config import get_logger + +log = get_logger("trends") class GoogleTrendsCache: @@ -135,12 +138,12 @@ def fetch_google_trends(product_name: str, retries: int = 3) -> Dict: except Exception as e: error_msg = str(e) - print(f"Google Trends API Error (attempt {attempt + 1}/{retries}): {error_msg}") + log.warning(f"Google Trends API Error (attempt {attempt + 1}/{retries}): {error_msg}") # Rate limit error - wait longer if '429' in error_msg or 'rate' in error_msg.lower(): wait_time = 5 * (attempt + 1) # 5, 10, 15 seconds - print(f"Rate limited. Waiting {wait_time} seconds...") + log.warning(f"Rate limited. Waiting {wait_time} seconds...") time.sleep(wait_time) continue diff --git a/backend/logging_config.py b/backend/logging_config.py new file mode 100644 index 0000000..cb50f0c --- /dev/null +++ b/backend/logging_config.py @@ -0,0 +1,197 @@ +""" +Structured Logging Configuration for Trendyol Product Dashboard + +Provides: +- JSON structured logs to file (for machine parsing) +- Colored console logs (for human reading) +- Correlation ID tracking per request/report +- Rotating file handlers with size limits +- Timing context manager for operation profiling +""" + +import logging +import logging.handlers +import json +import os +import time +from contextvars import ContextVar +from contextlib import contextmanager +from datetime import datetime, timezone + +# --------------------------------------------------------------------------- +# Context variables for log correlation +# --------------------------------------------------------------------------- + +_correlation_id: ContextVar[str] = ContextVar("correlation_id", default="-") +_report_id: ContextVar[str] = ContextVar("report_id", default="-") + + +def set_correlation_id(cid: str): + _correlation_id.set(cid) + + +def get_correlation_id() -> str: + return _correlation_id.get() + + +def set_report_id(rid): + _report_id.set(str(rid) if rid is not None else "-") + + +def get_report_id() -> str: + return _report_id.get() + + +# --------------------------------------------------------------------------- +# JSON Formatter (file output) +# --------------------------------------------------------------------------- + +class JSONFormatter(logging.Formatter): + """Structured JSON log formatter for file output.""" + + def format(self, record: logging.LogRecord) -> str: + log_entry = { + "ts": datetime.now(timezone.utc).isoformat(), + "level": record.levelname, + "logger": record.name, + "msg": record.getMessage(), + "correlation_id": get_correlation_id(), + "report_id": get_report_id(), + } + + # Add extra fields if present + for key in ("url", "status_code", "response_time_ms", "response_size", + "error_type", "duration_ms", "cb_state", "failures", + "batch_size", "product_count", "cache_size"): + val = getattr(record, key, None) + if val is not None: + log_entry[key] = val + + # Add exception info + if record.exc_info and record.exc_info[0] is not None: + log_entry["exception"] = self.formatException(record.exc_info) + + return json.dumps(log_entry, ensure_ascii=False, default=str) + + +# --------------------------------------------------------------------------- +# Console Formatter (colored, human-readable) +# --------------------------------------------------------------------------- + +_LEVEL_COLORS = { + "DEBUG": "\033[36m", # cyan + "INFO": "\033[32m", # green + "WARNING": "\033[33m", # yellow + "ERROR": "\033[31m", # red + "CRITICAL": "\033[1;31m", # bold red +} +_RESET = "\033[0m" + + +class ConsoleFormatter(logging.Formatter): + """Colored, human-readable console formatter.""" + + def format(self, record: logging.LogRecord) -> str: + color = _LEVEL_COLORS.get(record.levelname, "") + ts = datetime.now().strftime("%H:%M:%S") + level = record.levelname[0] # D, I, W, E, C + report = get_report_id() + report_tag = f" [r:{report}]" if report != "-" else "" + + msg = record.getMessage() + base = f"{color}{ts} [{level}]{report_tag} {msg}{_RESET}" + + if record.exc_info and record.exc_info[0] is not None: + base += "\n" + self.formatException(record.exc_info) + + return base + + +# --------------------------------------------------------------------------- +# Setup function +# --------------------------------------------------------------------------- + +def setup_logging(log_dir: str = None): + """ + Configure the entire logging system. Call once at startup. + + Creates: + - logs/trendyol.log (all levels, JSON, 10MB x 5 rotation) + - logs/errors.log (WARNING+, JSON, 10MB x 3 rotation) + - console output (INFO+, colored) + """ + if log_dir is None: + log_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "logs") + + os.makedirs(log_dir, exist_ok=True) + + root = logging.getLogger("trendyol") + root.setLevel(logging.DEBUG) + + # Prevent duplicate handlers on reload + if root.handlers: + return + + json_fmt = JSONFormatter() + console_fmt = ConsoleFormatter() + + # 1. Main log file — all levels, JSON + main_handler = logging.handlers.RotatingFileHandler( + os.path.join(log_dir, "trendyol.log"), + maxBytes=10 * 1024 * 1024, # 10 MB + backupCount=5, + encoding="utf-8", + ) + main_handler.setLevel(logging.DEBUG) + main_handler.setFormatter(json_fmt) + root.addHandler(main_handler) + + # 2. Error log file — WARNING+, JSON + error_handler = logging.handlers.RotatingFileHandler( + os.path.join(log_dir, "errors.log"), + maxBytes=10 * 1024 * 1024, + backupCount=3, + encoding="utf-8", + ) + error_handler.setLevel(logging.WARNING) + error_handler.setFormatter(json_fmt) + root.addHandler(error_handler) + + # 3. Console — INFO+, colored + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + console_handler.setFormatter(console_fmt) + root.addHandler(console_handler) + + # Quiet noisy libraries + logging.getLogger("urllib3").setLevel(logging.WARNING) + logging.getLogger("sqlalchemy").setLevel(logging.WARNING) + logging.getLogger("sqlalchemy.engine").setLevel(logging.WARNING) + + +# --------------------------------------------------------------------------- +# Logger factory +# --------------------------------------------------------------------------- + +def get_logger(name: str) -> logging.Logger: + """Get a namespaced logger: trendyol.""" + return logging.getLogger(f"trendyol.{name}") + + +# --------------------------------------------------------------------------- +# Timing context manager +# --------------------------------------------------------------------------- + +@contextmanager +def log_timing(logger: logging.Logger, operation: str, level=logging.INFO, **extra): + """Context manager that logs operation duration.""" + start = time.monotonic() + try: + yield + finally: + elapsed_ms = round((time.monotonic() - start) * 1000, 1) + logger.log( + level, + f"{operation} completed in {elapsed_ms}ms", + extra={"duration_ms": elapsed_ms, **extra}, + ) diff --git a/backend/main.py b/backend/main.py index 017c3d4..9b87e8a 100644 --- a/backend/main.py +++ b/backend/main.py @@ -24,8 +24,20 @@ import os from database import SessionLocal, Category, Snapshot, Report, EnrichmentError, init_db from google_trends_helper import estimate_traffic_sources, fetch_google_trends +from logging_config import setup_logging, get_logger, set_correlation_id, set_report_id, log_timing + +# Initialize logging first, then database +setup_logging() + +log_http = get_logger("http") +log_scraper = get_logger("scraper") +log_social = get_logger("social") +log_cache = get_logger("cache") +log_db = get_logger("db") +log_sse = get_logger("sse") +log_api = get_logger("api") +log_keywords = get_logger("keywords") -# Initialize database on startup init_db() # GS1 Barcode Prefix to Country Mapping (EAN-13 / EAN-8) @@ -401,6 +413,17 @@ class BoundedCache: del self.cache[oldest] del self.timestamps[oldest] + def __setitem__(self, key, value): + """Support cache[key] = value syntax""" + self.set(key, value) + + def __getitem__(self, key): + """Support cache[key] syntax""" + result = self.get(key) + if result is None: + raise KeyError(key) + return result + def __contains__(self, key): """Support 'in' operator for cache key checking""" with self.lock: @@ -425,6 +448,13 @@ enrichment_progress = BoundedCache(maxsize=50, ttl=7200) # similar_cache = {} # followers_cache = {} +def _extract_price(p): + """Extract selling price from product, handling both old and Search API formats""" + pr = p.get("price", {}) + if isinstance(pr, (int, float)): + return pr + return pr.get("sellingPrice") or pr.get("discountedPrice") or pr.get("current") or pr.get("originalPrice") or pr.get("old") or 0 + def _chunked(seq, size): for i in range(0, len(seq), size): yield seq[i:i+size] @@ -512,6 +542,7 @@ _retry_strategy = Retry( _http_adapter = HTTPAdapter(max_retries=_retry_strategy, pool_connections=100, pool_maxsize=200) _session = requests.Session() _session.headers.update(TRENDYOL_HEADERS) +_session.cookies.update({"storefrontId": "1", "language": "tr", "countryCode": "TR"}) _session.mount("https://", _http_adapter) _DEFAULT_TIMEOUT = 30 # Longer timeout to avoid premature failures @@ -530,11 +561,12 @@ class _RateLimiter: return sleep_for = self._next_time - now self._next_time += self.min_interval - # small jitter to avoid bursts - time.sleep(max(0, sleep_for) + random.uniform(0.0, 0.05)) + actual_sleep = max(0, sleep_for) + random.uniform(0.0, 0.05) + log_http.debug(f"Rate limiter sleeping {actual_sleep:.3f}s") + time.sleep(actual_sleep) -_trendyol_limiter = _RateLimiter(rate_per_sec=5.0) # 0.2 seconds between requests (Optimized for localhost - 10x faster!) +_trendyol_limiter = _RateLimiter(rate_per_sec=1.5) # ~0.67s between requests (safe for Trendyol rate limits) # Circuit Breaker for Social Proof endpoint @@ -555,6 +587,10 @@ class _CircuitBreaker: if time.monotonic() - self._opened_at > self.reset_timeout: self._failures = 0 self._opened_at = None + log_social.warning( + "Circuit breaker auto-reset to CLOSED (half-open recovery)", + extra={"cb_state": "closed", "failures": 0}, + ) return False return True @@ -564,12 +600,27 @@ class _CircuitBreaker: self._failures += 1 if self._failures >= self.failure_threshold and self._opened_at is None: self._opened_at = time.monotonic() + log_social.critical( + f"Circuit breaker OPENED after {self._failures} consecutive failures", + extra={"cb_state": "open", "failures": self._failures}, + ) + else: + log_social.warning( + f"Circuit breaker failure #{self._failures}/{self.failure_threshold}", + extra={"cb_state": "degraded", "failures": self._failures}, + ) def record_success(self): """Record a success and reset the circuit""" with self._lock: + was_open = self._opened_at is not None self._failures = 0 self._opened_at = None + if was_open: + log_social.info( + "Circuit breaker reset to CLOSED after success", + extra={"cb_state": "closed", "failures": 0}, + ) def get_status(self) -> dict: """Get current circuit status""" @@ -585,13 +636,47 @@ class _CircuitBreaker: } -_social_proof_breaker = _CircuitBreaker(failure_threshold=3, reset_timeout=300.0) +_social_proof_breaker = _CircuitBreaker(failure_threshold=5, reset_timeout=60.0) def _http_get(url: str, params: dict) -> requests.Response: """GET with shared session, retry, timeout, and rate limiting.""" _trendyol_limiter.wait() - return _session.get(url, params=params, timeout=_DEFAULT_TIMEOUT) + start = time.monotonic() + try: + resp = _session.get(url, params=params, timeout=_DEFAULT_TIMEOUT) + elapsed_ms = round((time.monotonic() - start) * 1000, 1) + log_http.debug( + f"{resp.status_code} {url}", + extra={ + "url": url, + "status_code": resp.status_code, + "response_time_ms": elapsed_ms, + "response_size": len(resp.content), + }, + ) + return resp + except requests.exceptions.Timeout: + elapsed_ms = round((time.monotonic() - start) * 1000, 1) + log_http.warning( + f"TIMEOUT {url} after {elapsed_ms}ms", + extra={"url": url, "error_type": "timeout", "response_time_ms": elapsed_ms}, + ) + raise + except requests.exceptions.ConnectionError as e: + elapsed_ms = round((time.monotonic() - start) * 1000, 1) + log_http.warning( + f"CONNECTION_ERROR {url}: {e}", + extra={"url": url, "error_type": "connection", "response_time_ms": elapsed_ms}, + ) + raise + except requests.exceptions.RequestException as e: + elapsed_ms = round((time.monotonic() - start) * 1000, 1) + log_http.error( + f"REQUEST_ERROR {url}: {e}", + extra={"url": url, "error_type": "request", "response_time_ms": elapsed_ms}, + ) + raise from typing import Optional as _Optional @@ -610,9 +695,9 @@ def _log_enrichment_error(db: Session, *, report_id: _Optional[int], product_id: attempt=attempt )) db.commit() - except Exception: - pass + except Exception as exc: # Avoid crashing on logging failures + log_db.warning(f"Failed to persist enrichment error: {exc}", exc_info=True) db.rollback() def load_report_products(db: Session, report_id: int): @@ -703,7 +788,7 @@ def fetch_product_reviews(product_id: int, page: int = 0, page_size: int = 5): if resp.status_code == 200: return resp.json() except Exception as e: - print(f"Review API error for product {product_id}: {e}") + log_social.warning(f"Review API error for product {product_id}: {e}") return None @@ -750,7 +835,7 @@ def fetch_social_proof(product_ids: list): return {"result": result} if result else data except Exception as e: - print(f"Social Proof API error: {e}") + log_social.warning(f"Social Proof API error: {e}") return None @@ -773,7 +858,7 @@ def _parse_social_count(count_str: str) -> int: # Try to parse as float return int(float(clean)) - except: + except (ValueError, TypeError, AttributeError): return 0 @@ -794,7 +879,7 @@ def fetch_merchant_questions(product_id: int, page: int = 0, page_size: int = 4) if resp.status_code == 200: return resp.json() except Exception as e: - print(f"Merchant Questions API error for product {product_id}: {e}") + log_social.warning(f"Merchant Questions API error for product {product_id}: {e}") return None @@ -817,7 +902,7 @@ def fetch_similar_products(product_id: int, page: int = 0, page_size: int = 8): if resp.status_code == 200: return resp.json() except Exception as e: - print(f"Similar Products API error for product {product_id}: {e}") + log_social.warning(f"Similar Products API error for product {product_id}: {e}") return None @@ -835,7 +920,7 @@ def fetch_merchant_followers(merchant_id: int): if resp.status_code == 200: return resp.json() except Exception as e: - print(f"Merchant Followers API error for merchant {merchant_id}: {e}") + log_social.warning(f"Merchant Followers API error for merchant {merchant_id}: {e}") return None @@ -845,6 +930,7 @@ class CategoryBase(BaseModel): parent_id: Optional[int] = None trendyol_category_id: Optional[int] = None trendyol_url: Optional[str] = None + path_model: Optional[str] = None is_active: bool = True @@ -857,6 +943,7 @@ class CategoryUpdate(BaseModel): parent_id: Optional[int] = None trendyol_category_id: Optional[int] = None trendyol_url: Optional[str] = None + path_model: Optional[str] = None is_active: Optional[bool] = None @@ -958,6 +1045,7 @@ def get_main_categories(db: Session = Depends(get_db)): "parent_id": cat.parent_id, "trendyol_category_id": cat.trendyol_category_id, "trendyol_url": cat.trendyol_url, + "path_model": cat.path_model, "is_active": cat.is_active, "created_at": cat.created_at, "children_count": children_count @@ -984,6 +1072,7 @@ def get_category(category_id: int, db: Session = Depends(get_db)): "parent_id": category.parent_id, "trendyol_category_id": category.trendyol_category_id, "trendyol_url": category.trendyol_url, + "path_model": category.path_model, "is_active": category.is_active, "created_at": category.created_at, "children_count": children_count @@ -1023,6 +1112,7 @@ def get_category_children(category_id: int, db: Session = Depends(get_db)): "parent_id": cat.parent_id, "trendyol_category_id": cat.trendyol_category_id, "trendyol_url": cat.trendyol_url, + "path_model": cat.path_model, "is_active": cat.is_active, "created_at": cat.created_at, "children_count": children_count @@ -1048,6 +1138,7 @@ def create_category(category: CategoryCreate, db: Session = Depends(get_db)): parent_id=category.parent_id, trendyol_category_id=category.trendyol_category_id, trendyol_url=category.trendyol_url, + path_model=category.path_model, is_active=category.is_active ) @@ -1061,6 +1152,7 @@ def create_category(category: CategoryCreate, db: Session = Depends(get_db)): "parent_id": db_category.parent_id, "trendyol_category_id": db_category.trendyol_category_id, "trendyol_url": db_category.trendyol_url, + "path_model": db_category.path_model, "is_active": db_category.is_active, "created_at": db_category.created_at, "children_count": 0 @@ -1090,6 +1182,8 @@ def update_category(category_id: int, category: CategoryUpdate, db: Session = De db_category.trendyol_category_id = category.trendyol_category_id if category.trendyol_url is not None: db_category.trendyol_url = category.trendyol_url + if category.path_model is not None: + db_category.path_model = category.path_model if category.is_active is not None: db_category.is_active = category.is_active @@ -1105,6 +1199,7 @@ def update_category(category_id: int, category: CategoryUpdate, db: Session = De "parent_id": db_category.parent_id, "trendyol_category_id": db_category.trendyol_category_id, "trendyol_url": db_category.trendyol_url, + "path_model": db_category.path_model, "is_active": db_category.is_active, "created_at": db_category.created_at, "children_count": children_count @@ -1141,6 +1236,7 @@ class BulkCategoryItem(BaseModel): parent_name: Optional[str] = None trendyol_category_id: Optional[int] = None trendyol_url: Optional[str] = None + path_model: Optional[str] = None class BulkCategoryImport(BaseModel): categories: List[BulkCategoryItem] @@ -1179,6 +1275,7 @@ def bulk_import_categories(data: BulkCategoryImport, db: Session = Depends(get_d parent_id=parent_id, trendyol_category_id=item.trendyol_category_id, trendyol_url=item.trendyol_url, + path_model=item.path_model, is_active=True ) db.add(db_cat) @@ -1195,6 +1292,19 @@ def bulk_import_categories(data: BulkCategoryImport, db: Session = Depends(get_d } +@app.post("/categories/seed-from-json") +def seed_from_json_endpoint(clear_existing: bool = True): + """Seed categories from trendyol_categories.json file""" + from category_seeder import seed_from_json + try: + stats = seed_from_json(clear_existing=clear_existing) + return {"message": "Seed tamamlandı", **stats} + except FileNotFoundError: + raise HTTPException(status_code=404, detail="trendyol_categories.json not found") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + # Get all snapshots @app.get("/snapshots", response_model=List[SnapshotResponse]) def get_snapshots(db: Session = Depends(get_db), skip: int = 0, limit: int = 100): @@ -1261,9 +1371,9 @@ def get_category_products(category_id: int, db: Session = Depends(get_db)): # Helper: recursively collect scrapable categories (those with trendyol_category_id) def collect_scrapable_categories(db: Session, category_ids: list) -> list: """ - Given a list of category IDs, collect ALL leaf categories with valid trendyol_category_id. + Given a list of category IDs, collect ALL leaf categories with path_model or trendyol_category_id. Always recurses into children to find every scrapable category in the tree. - Returns list of (trendyol_category_id, name) tuples. + Returns list of (path_model, name, trendyol_category_id) tuples. """ result = [] seen = set() @@ -1282,9 +1392,12 @@ def collect_scrapable_categories(db: Session, category_ids: list) -> list: # Has children — recurse deeper child_ids = [c.id for c in children] _collect(child_ids) + elif cat.path_model: + # Leaf category with path_model — scrape via Search API + result.append((cat.path_model, cat.name, cat.trendyol_category_id)) elif cat.trendyol_category_id: - # Leaf category with trendyol_category_id — add to results - result.append((cat.trendyol_category_id, cat.name)) + # Fallback: no path_model but has category_id (legacy) + result.append((None, cat.name, cat.trendyol_category_id)) _collect(category_ids) return result @@ -1314,28 +1427,28 @@ def scrape_category_data(category_id: int, db: Session = Depends(get_db)): categories_to_scrape = collect_scrapable_categories(db, sub_ids) if not categories_to_scrape: - raise HTTPException(status_code=400, detail="No valid Trendyol IDs found in this category or its subcategories") + raise HTTPException(status_code=400, detail="No scrapable categories found (missing path_model/trendyol_category_id)") - # Start scraping - results = scrape_multiple_categories(categories_to_scrape, delay=2.0) + # Start scraping — convert to legacy format for scrape_multiple_categories + # Only categories with trendyol_category_id can use the old API + legacy_cats = [(cat_id, name) for (pm, name, cat_id) in categories_to_scrape if cat_id] + results = scrape_multiple_categories(legacy_cats, delay=2.0) if legacy_cats else { + "successful": 0, "failed": 0, "total_products": 0, "details": [] + } # Create snapshots for successful scrapes for detail in results["details"]: if detail["success"]: - pass - # Find the category in DB sub_cat = db.query(Category).filter( Category.trendyol_category_id == detail["category_id"] ).first() if sub_cat: - pass - # Create snapshot snapshot = Snapshot( category_id=sub_cat.id, snapshot_month=datetime.now().strftime("%Y-%m"), total_products=detail["total_products"], - avg_price=0, # Calculate from products if needed + avg_price=0, json_file_path=detail["file_path"], scraped_at=datetime.now() ) @@ -1416,19 +1529,16 @@ async def create_report( SYNCHRONOUS: Report only saved when 100% complete Accepts GET request for EventSource compatibility """ - # print(f"🔍 DEBUG - Received request:") - print(f" - name: {name}") - print(f" - category_id: {category_id}") - print(f" - subcategory_ids (raw): {subcategory_ids}") + log_api.info(f"Report create request: name={name}, category_id={category_id}, subcategory_ids={subcategory_ids}") # Parse subcategory_ids if provided parsed_subcategory_ids = None if subcategory_ids: try: parsed_subcategory_ids = json_module.loads(subcategory_ids) - print(f" - subcategory_ids (parsed): {parsed_subcategory_ids}") + log_api.debug(f"Parsed subcategory_ids: {parsed_subcategory_ids}") except Exception as e: - print(f" - ❌ Error parsing subcategory_ids: {e}") + log_api.warning(f"Error parsing subcategory_ids: {e}") parsed_subcategory_ids = None # Get main category @@ -1464,7 +1574,7 @@ async def create_report( categories_to_scrape = collect_scrapable_categories(db, sub_ids) if not categories_to_scrape: - raise HTTPException(status_code=400, detail="No valid Trendyol IDs found in this category or its subcategories") + raise HTTPException(status_code=400, detail="No scrapable categories found (missing path_model/trendyol_category_id)") # Generate unique task ID task_id = str(uuid.uuid4()) @@ -1488,8 +1598,10 @@ async def create_report( # Stream progress with SSE async def progress_stream(): """Generator that yields real-time progress events""" + set_correlation_id(task_id) + set_report_id(category_id) + log_sse.info(f"SSE stream started: task={task_id}, category={main_category.name}") try: - pass # Send initial info yield f"data: {json_module.dumps({'type': 'info', 'message': f'📂 {main_category.name} kategorisi seçildi', 'progress': 0})}\n\n" await asyncio.sleep(0.1) @@ -1501,7 +1613,7 @@ async def create_report( await asyncio.sleep(0.5) # Start synchronous scraping with progress updates - from scraper import TrendyolScraper + from scraper import TrendyolSearchScraper, TrendyolScraper import json import os from datetime import datetime @@ -1515,29 +1627,64 @@ async def create_report( } # Scrape each category with real-time updates - for idx, (cat_id, cat_name) in enumerate(categories_to_scrape, 1): + for idx, (path_model, cat_name, cat_id) in enumerate(categories_to_scrape, 1): progress = int((idx / len(categories_to_scrape)) * 80) + 10 yield f"data: {json_module.dumps({'type': 'processing', 'message': f'🔍 [{idx}/{len(categories_to_scrape)}] {cat_name} çekiliyor...', 'progress': progress, 'current': idx, 'total': len(categories_to_scrape)})}\n\n" await asyncio.sleep(0.1) try: - pass - # API call notification - yield f"data: {json_module.dumps({'type': 'api', 'message': f'🌐 API: Trendyol Best Seller - Kategori ID: {cat_id}', 'progress': progress})}\n\n" - await asyncio.sleep(0.1) + if path_model: + # New Search API — works for both -c and -s categories + yield f"data: {json_module.dumps({'type': 'api', 'message': f'🌐 API: Trendyol Search - {path_model}', 'progress': progress})}\n\n" + await asyncio.sleep(0.1) - scraper = TrendyolScraper(cat_id) - products = scraper.fetch_all_products() + scraper = TrendyolSearchScraper(path_model) + products = await asyncio.get_event_loop().run_in_executor(None, scraper.fetch_all_products) + + # Search API socialProofs boş döner — Top Rankings API'den zenginleştir + if products and cat_id and not any(p.get("socialProofs") for p in products): + try: + top_scraper = TrendyolScraper(cat_id, page_size=20) + top_products = await asyncio.get_event_loop().run_in_executor( + None, lambda: top_scraper.fetch_all_products(delay=0.5, max_pages=5) + ) + if top_products: + # ID bazlı socialProofs eşleştirme + social_map = {} + for tp in top_products: + tid = tp.get("id") or tp.get("contentId") + sp = tp.get("socialProofs", []) + if tid and sp: + social_map[int(tid)] = sp + if social_map: + for p in products: + pid = p.get("id") + if pid and int(pid) in social_map: + p["socialProofs"] = social_map[int(pid)] + log_sse.info(f"Enriched {len(social_map)} products with socialProofs from Top Rankings API") + except Exception as e: + log_sse.warning(f"Top Rankings socialProofs enrichment failed: {e}") + + elif cat_id: + # Legacy fallback — old top-rankings API + yield f"data: {json_module.dumps({'type': 'api', 'message': f'🌐 API: Trendyol Best Seller - Kategori ID: {cat_id}', 'progress': progress})}\n\n" + await asyncio.sleep(0.1) + + scraper = TrendyolScraper(cat_id) + products = await asyncio.get_event_loop().run_in_executor(None, scraper.fetch_all_products) + else: + products = [] if products: - pass - # Save to file + # Save to file — use cat_id if available, else derive from path_model os.makedirs(CATEGORIES_DIR, exist_ok=True) - filename = f"{CATEGORIES_DIR}/{cat_name.replace(' ', '_')}_{cat_id}.json" + file_id = cat_id if cat_id else path_model.replace("/", "_") + filename = f"{CATEGORIES_DIR}/{cat_name.replace(' ', '_')}_{file_id}.json" data = { "category_id": cat_id, + "path_model": path_model, "category_name": cat_name, "total_products": len(products), "scraped_at": datetime.now().isoformat(), @@ -1550,6 +1697,7 @@ async def create_report( results["total_products"] += len(products) results["details"].append({ "category_id": cat_id, + "path_model": path_model, "category_name": cat_name, "success": True, "total_products": len(products), @@ -1562,6 +1710,7 @@ async def create_report( results["failed"] += 1 results["details"].append({ "category_id": cat_id, + "path_model": path_model, "category_name": cat_name, "success": False, "total_products": 0, @@ -1574,6 +1723,7 @@ async def create_report( results["failed"] += 1 results["details"].append({ "category_id": cat_id, + "path_model": path_model, "category_name": cat_name, "success": False, "total_products": 0, @@ -1585,127 +1735,8 @@ async def create_report( # Rate limiting (non-blocking) await asyncio.sleep(1.5) - # ============================================ - # Sosyal Kanıt Verilerini Topla - # ============================================ - # print(f"\n🔍 DEBUG: Sosyal kanıt toplama bölümüne ulaşıldı") - # print(f"🔍 DEBUG: results['details'] uzunluğu: {len(results.get('details', []))}") - - yield f"data: {json_module.dumps({'type': 'info', 'message': '📊 Sosyal kanıt verileri toplanıyor...', 'progress': 85})}\n\n" - await asyncio.sleep(0.5) - - # Collect all product IDs from scraped data AND product info - all_product_ids = [] - product_info_map = {} # Map product_id to product info (name, image, url, category) - for detail in results["details"]: - if detail["success"] and detail["file_path"]: - category_name = detail.get("category_name", "Bilinmeyen Kategori") - try: - cat_data = await asyncio.to_thread(_read_json, detail["file_path"]) - products = cat_data.get("products", []) - # print(f"🔍 DEBUG: {detail['file_path']} dosyasından {len(products)} ürün bulundu") - for product in products: - product_id = product.get("id") - if product_id: - all_product_ids.append(int(product_id)) - # Extract rating data - rating_score_obj = product.get("ratingScore", {}) - rating = rating_score_obj.get("averageRating", 0) if isinstance(rating_score_obj, dict) else 0 - rating_count = rating_score_obj.get("totalCount", 0) if isinstance(rating_score_obj, dict) else 0 - - # Extract barcode from first variant - barcode = "" - merchant_listings = product.get("merchantListings", []) - if merchant_listings and len(merchant_listings) > 0: - variants = merchant_listings[0].get("variants", []) - if variants and len(variants) > 0: - barcode = variants[0].get("barcode", "") - - # Store product info with category, brand, price, rating, and barcode - product_info_map[str(product_id)] = { - "name": product.get("name", ""), - "imageUrl": product.get("imageUrl", ""), - "url": product.get("url", ""), - "category": category_name, - "brand": product.get("brand", {}).get("name", "Bilinmeyen Marka"), - "price": product.get("price", {}).get("sellingPrice", 0), - "rating": round(rating, 2) if rating else 0, - "rating_count": rating_count, - "barcode": barcode, - "barcode_country": get_country_from_barcode(barcode), # Extract country from barcode prefix - "origin_country": "Bilinmeyen" # Not available in product data - } - except Exception as e: - pass - # print(f"⚠️ DEBUG: Dosya okuma hatası {detail['file_path']}: {str(e)}") - pass - - # Collect social proof data in batches - social_proof_data = {} - total_products = len(all_product_ids) - processed = 0 - batch_size = 20 - - # print(f"🔍 DEBUG: Toplam {total_products} ürün ID'si toplandı") - # print(f"🔍 DEBUG: İlk 5 ürün ID'si: {all_product_ids[:5] if all_product_ids else 'YOK'}") - - if total_products > 0: - pass - # print(f"✅ DEBUG: total_products > 0 koşulu sağlandı, sosyal kanıt toplama başlıyor") - for chunk in _chunked(all_product_ids, batch_size): - try: - pass - # print(f"🔍 DEBUG: {len(chunk)} ürün için sosyal kanıt API'ye istek gönderiliyor: {chunk}") - data = fetch_social_proof(chunk) - # print(f"🔍 DEBUG: API yanıtı alındı: {type(data)}, 'result' var mı: {'result' in data if data else False}") - if data and "result" in data: - items = data.get("result", []) - # print(f"🔍 DEBUG: {len(items)} adet sonuç bulundu") - for item in items: - pid = item.get("contentId") - if pid: - pid_str = str(pid) - # Get product info from map - product_info = product_info_map.get(pid_str, {}) - social_proof_data[pid_str] = { - "page_views": item.get("pageViewCount", 0), - "orders": item.get("orderCount", 0), - "baskets": item.get("basketCount", 0), - "favorites": item.get("favoriteCount", 0), - "name": product_info.get("name", ""), - "imageUrl": product_info.get("imageUrl", ""), - "url": product_info.get("url", ""), - "category": product_info.get("category", "Bilinmeyen Kategori"), - "brand": product_info.get("brand", "Bilinmeyen Marka"), - "price": product_info.get("price", 0), - "rating": product_info.get("rating", 0), - "rating_count": product_info.get("rating_count", 0), - "barcode": product_info.get("barcode", ""), - "origin_country": product_info.get("origin_country", "Bilinmeyen") - } - except Exception as e: - pass - # print(f"❌ DEBUG: Sosyal kanıt API hatası: {str(e)}") - pass - - processed += len(chunk) - progress_pct = int((processed / total_products) * 5) + 85 # 85-90% - yield f"data: {json_module.dumps({'type': 'info', 'message': f'📊 Sosyal kanıt: {processed}/{total_products} ürün', 'progress': progress_pct})}\n\n" - # SSE keepalive heartbeat every 10 batches - if processed % (batch_size * 10) == 0: - yield f": heartbeat\n\n" - await asyncio.sleep(0.3) # Rate limiting (non-blocking) - - # print(f"✅ DEBUG: Sosyal kanıt toplama tamamlandı. Toplanan veri: {len(social_proof_data)} ürün") - yield f"data: {json_module.dumps({'type': 'success', 'message': f'✅ Sosyal kanıt verileri toplandı ({len(social_proof_data)} ürün)', 'progress': 90})}\n\n" - await asyncio.sleep(0.3) - else: - pass - # print(f"⚠️ DEBUG: total_products = 0, sosyal kanıt toplanmadı") - pass - # Generate report file - yield f"data: {json_module.dumps({'type': 'info', 'message': '📝 Rapor dosyası oluşturuluyor...', 'progress': 92})}\n\n" + yield f"data: {json_module.dumps({'type': 'info', 'message': '📝 Rapor dosyası oluşturuluyor...', 'progress': 88})}\n\n" await asyncio.sleep(0.5) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") @@ -1727,7 +1758,7 @@ async def create_report( await asyncio.to_thread(_write_json, json_filename, combined_data) # Save to database - yield f"data: {json_module.dumps({'type': 'info', 'message': '💾 Veritabanına kaydediliyor...', 'progress': 95})}\n\n" + yield f"data: {json_module.dumps({'type': 'info', 'message': '💾 Veritabanına kaydediliyor...', 'progress': 93})}\n\n" await asyncio.sleep(0.5) new_report = Report( @@ -1742,38 +1773,64 @@ async def create_report( await asyncio.to_thread(_db_save, db, new_report) - # Save social proof data to persistent cache - # print(f"\n🔍 DEBUG: Sosyal kanıt kaydetme bölümü - social_proof_data uzunluğu: {len(social_proof_data)}") - if social_proof_data: - enrich_dir = f"{REPORTS_DIR}/enrich_{new_report.id}" - os.makedirs(enrich_dir, exist_ok=True) - social_file = f"{enrich_dir}/social.json" + # Start enrichment in background thread (survives client disconnect) + import threading + report_id_for_enrich = new_report.id + enrichment_progress[report_id_for_enrich] = {"status": "queued", "step": "queued"} + threading.Thread( + target=_enrich_report_task, + args=(report_id_for_enrich,), + daemon=True + ).start() + log_sse.info(f"Background enrichment started for report {report_id_for_enrich}") - social_output = { - "products": len(all_product_ids), - "total": { - "page_views": sum(d.get("page_views", 0) for d in social_proof_data.values()), - "orders": sum(d.get("orders", 0) for d in social_proof_data.values()), - "baskets": sum(d.get("baskets", 0) for d in social_proof_data.values()), - "favorites": sum(d.get("favorites", 0) for d in social_proof_data.values()) - }, - "missing": total_products - len(social_proof_data), - "details": social_proof_data - } + # Wait for enrichment to complete, sending progress updates via SSE + yield f"data: {json_module.dumps({'type': 'info', 'message': '📊 Sosyal kanıt verileri toplanıyor...', 'progress': 90})}\n\n" + await asyncio.sleep(0.5) - # print(f"✅ DEBUG: Sosyal kanıt dosyası kaydediliyor: {social_file}") - # print(f"🔍 DEBUG: Toplam metrikler: {social_output['total']}") - await asyncio.to_thread(_write_json, social_file, social_output) - # print(f"✅ DEBUG: Sosyal kanıt dosyası başarıyla kaydedildi") + progress_key = f"social_{report_id_for_enrich}" + max_wait = 600 # 10 dakika max + waited = 0 + while waited < max_wait: + # Check enrichment task status + enrich_status = enrichment_progress.get(report_id_for_enrich) or {} + if enrich_status.get("status") in ("completed", "error"): + break + + # Check social proof progress + social_progress = enrichment_progress.get(progress_key) or {} + sp_processed = social_progress.get("processed", 0) + sp_total = social_progress.get("total", 0) + sp_pct = social_progress.get("progress", 0) + + if sp_total > 0: + overall_pct = 90 + int(sp_pct * 0.09) # 90-99 arası + yield f"data: {json_module.dumps({'type': 'info', 'message': f'📊 Sosyal kanıt: {sp_processed}/{sp_total} ürün (%{sp_pct})', 'progress': overall_pct})}\n\n" + + await asyncio.sleep(3) + waited += 3 + + # Final status check + enrich_status = enrichment_progress.get(report_id_for_enrich) or {} + if enrich_status.get("status") == "completed": + yield f"data: {json_module.dumps({'type': 'info', 'message': '✅ Sosyal kanıt tamamlandı!', 'progress': 99})}\n\n" + elif enrich_status.get("status") == "error": + err_msg = str(enrich_status.get("error", ""))[:100] + yield f"data: {json_module.dumps({'type': 'warning', 'message': f'⚠️ Sosyal kanıt hatası: {err_msg}', 'progress': 99})}\n\n" else: - pass - # print(f"⚠️ DEBUG: social_proof_data boş, dosya kaydedilmedi") + yield f"data: {json_module.dumps({'type': 'warning', 'message': '⚠️ Sosyal kanıt zaman aşımı, arka planda devam ediyor...', 'progress': 99})}\n\n" - # Final success message with report ID - yield f"data: {json_module.dumps({'type': 'complete', 'message': '✅ Rapor başarıyla oluşturuldu!', 'progress': 100, 'report_id': new_report.id, 'total_products': results['total_products'], 'successful': results['successful']})}\n\n" await asyncio.sleep(0.1) + # Final success message with report ID + yield f"data: {json_module.dumps({'type': 'complete', 'message': '✅ Rapor başarıyla oluşturuldu!', 'progress': 100, 'report_id': new_report.id, 'total_products': results['total_products'], 'successful': results['successful'], 'enrichment_status': enrich_status.get('status', 'unknown')})}\n\n" + await asyncio.sleep(0.1) + + except asyncio.CancelledError: + log_sse.warning(f"SSE stream cancelled (client disconnect): task={task_id}") + return except Exception as e: + log_sse.error(f"SSE stream error: task={task_id}, error={e}", exc_info=True) yield f"data: {json_module.dumps({'type': 'error', 'message': f'❌ Kritik hata: {str(e)}', 'progress': -1})}\n\n" return StreamingResponse(progress_stream(), media_type="text/event-stream") @@ -1818,7 +1875,7 @@ def get_scraping_progress(task_id: str): # Background task for scraping def scrape_in_background(task_id: str, report_name: str, category_id: int, categories_to_scrape: list, category_name: str): """Background task that handles scraping with progress updates""" - from scraper import TrendyolScraper + from scraper import TrendyolSearchScraper, TrendyolScraper import json import os from datetime import datetime @@ -1858,7 +1915,7 @@ def scrape_in_background(task_id: str, report_name: str, category_id: int, categ "details": [] } - for idx, (cat_id, cat_name) in enumerate(categories_to_scrape, 1): + for idx, (path_model, cat_name, cat_id) in enumerate(categories_to_scrape, 1): scraping_progress[task_id]["current"] = idx scraping_progress[task_id]["current_category"] = cat_name scraping_progress[task_id]["progress"] = int((idx / len(categories_to_scrape)) * 80) + 10 @@ -1866,17 +1923,27 @@ def scrape_in_background(task_id: str, report_name: str, category_id: int, categ add_log(f"🔍 [{idx}/{len(categories_to_scrape)}] {cat_name} çekiliyor...") try: - scraper = TrendyolScraper(cat_id) + if path_model: + scraper = TrendyolSearchScraper(path_model) + elif cat_id: + scraper = TrendyolScraper(cat_id) + else: + add_log(f"⚠️ {cat_name} - path_model veya cat_id yok, atlanıyor", "warning") + results["failed"] += 1 + scraping_progress[task_id]["failed"] += 1 + continue + products = scraper.fetch_all_products() if products: - pass # Save to file os.makedirs(CATEGORIES_DIR, exist_ok=True) - filename = f"{CATEGORIES_DIR}/{cat_name.replace(' ', '_')}_{cat_id}.json" + file_id = cat_id if cat_id else path_model.replace("/", "_") + filename = f"{CATEGORIES_DIR}/{cat_name.replace(' ', '_')}_{file_id}.json" data = { "category_id": cat_id, + "path_model": path_model, "category_name": cat_name, "total_products": len(products), "scraped_at": datetime.now().isoformat(), @@ -1890,6 +1957,7 @@ def scrape_in_background(task_id: str, report_name: str, category_id: int, categ results["total_products"] += len(products) results["details"].append({ "category_id": cat_id, + "path_model": path_model, "category_name": cat_name, "success": True, "total_products": len(products), @@ -1904,6 +1972,7 @@ def scrape_in_background(task_id: str, report_name: str, category_id: int, categ results["failed"] += 1 results["details"].append({ "category_id": cat_id, + "path_model": path_model, "category_name": cat_name, "success": False, "total_products": 0, @@ -1916,6 +1985,7 @@ def scrape_in_background(task_id: str, report_name: str, category_id: int, categ results["failed"] += 1 results["details"].append({ "category_id": cat_id, + "path_model": path_model, "category_name": cat_name, "success": False, "total_products": 0, @@ -2038,1114 +2108,30 @@ DASHBOARD_CACHE_TTL = 3600 # 1 hour in seconds @app.get("/api/reports/{report_id}/dashboard-data") def get_dashboard_data(report_id: int, db: Session = Depends(get_db)): """ - Process report data and return dashboard KPIs and chart data (with caching) + Dashboard verisi döndür — konsolide dosya varsa oku, yoksa yerinde oluştur. """ - import json - import os - from collections import defaultdict + from data_consolidator import load_consolidated_report, build_consolidated_report, save_consolidated_report - # Check cache first - cache_key = f"dashboard_{report_id}" - # TEMPORARILY DISABLED FOR DEBUGGING - Re-enable after fixing category sales - # if cache_key in dashboard_cache: - # cached_data, cached_time = dashboard_cache[cache_key] - # if time.time() - cached_time < DASHBOARD_CACHE_TTL: - # print(f"📊 Cache hit for report {report_id}") - # return cached_data - print(f"📊 Cache bypassed for debugging - recalculating dashboard data for report {report_id}") + # 1. Konsolide dosyayı oku (hızlı yol) + data = load_consolidated_report(report_id, REPORTS_DIR) + if data: + log_cache.info(f"Konsolide dosyadan yüklendi: report {report_id}") + return data - # Get report from database + # 2. Eski raporlar için fallback: yerinde oluştur ve kaydet (lazy migration) + log_cache.info(f"Konsolide dosya yok, oluşturuluyor: report {report_id}") report = db.query(Report).filter(Report.id == report_id).first() if not report: raise HTTPException(status_code=404, detail="Report not found") - - # Read report JSON file if not report.json_file_path or not os.path.exists(report.json_file_path): raise HTTPException(status_code=404, detail="Report data file not found") - try: - with open(report.json_file_path, 'r', encoding='utf-8') as f: - report_data = json.load(f) - except Exception as e: - raise HTTPException(status_code=500, detail=f"Error reading report file: {str(e)}") + data = build_consolidated_report(report_id, db, REPORTS_DIR) + if not data: + raise HTTPException(status_code=500, detail="Failed to build consolidated report") - # Load all product data from category files - all_products = [] - categories_data = defaultdict(list) - brands_data = defaultdict(int) - - for detail in report_data.get("details", []): - if detail.get("success") and detail.get("file_path"): - file_path = detail["file_path"] - if os.path.exists(file_path): - try: - with open(file_path, 'r', encoding='utf-8') as f: - cat_data = json.load(f) - products = cat_data.get("products", []) - - # Enrich products with category name from report details - # Clean category name: remove trailing ID pattern (e.g., "Android Cep Telefonu 164461" → "Android Cep Telefonu") - cat_name_raw = detail["category_name"] - # Remove trailing space + numbers pattern - cat_name = re.sub(r'\s+\d+$', '', cat_name_raw) - - for product in products: - # Update category field with actual name - if isinstance(product.get("category"), dict): - product["category"]["name"] = cat_name - else: - product["category"] = {"id": 0, "name": cat_name} - - all_products.extend(products) - - # Group by category - categories_data[cat_name].extend(products) - - # Count brands - for product in products: - brand_name = product.get("brand", {}).get("name", "Unknown") - brands_data[brand_name] += 1 - except: - continue - - # Calculate KPIs - total_products = len(all_products) - total_brands = len(brands_data) - - # Price calculations - prices = [p.get("price", {}).get("sellingPrice", 0) for p in all_products if p.get("price", {}).get("sellingPrice")] - avg_price = sum(prices) / len(prices) if prices else 0 - min_price = min(prices) if prices else 0 - max_price = max(prices) if prices else 0 - - # DISABLED: Discount calculations (not needed per user request) - # discounted_count = sum(1 for p in all_products if p.get("price", {}).get("discountedPrice") and p.get("price", {}).get("discountedPrice") < p.get("price", {}).get("originalPrice", 0)) - # discount_rate = (discounted_count / total_products * 100) if total_products > 0 else 0 - - # DISABLED: Stock calculations (not needed per user request) - # in_stock_count = sum(1 for p in all_products if p.get("inStock", False)) - # out_of_stock_count = total_products - in_stock_count - # running_out_count = sum(1 for p in all_products if p.get("isRunningOut", False)) - - # Rating calculations - ratings = [] - for p in all_products: - rating = p.get("rating", 0) - # Handle if rating is a dict (ratingScore) - if isinstance(rating, dict): - rating = rating.get("averageRating", 0) - if rating: - ratings.append(rating) - avg_rating = sum(ratings) / len(ratings) if ratings else 0 - - # DISABLED: Flash products and discount calculations (not needed per user request) - # flash_count = sum(1 for p in all_products if p.get("isFlash", False)) - - # Advanced KPIs - # DISABLED: 1. Discount Depth (average discount percentage for discounted products) - # discount_depths = [] - # for p in all_products: - # original = p.get("price", {}).get("originalPrice", 0) - # discounted = p.get("price", {}).get("discountedPrice", 0) - # if original > 0 and discounted > 0 and discounted < original: - # discount_depths.append((original - discounted) / original * 100) - # avg_discount_depth = sum(discount_depths) / len(discount_depths) if discount_depths else 0 - - # 2. Median Price (for price premium index calculation) - DOĞRU HESAPLAMA - median_price = float(np.percentile(prices, 50)) if prices else 0 - - # DISABLED: 3. Stock Risk Metric (running_out / in_stock ratio) - not needed per user request - # stock_risk = (running_out_count / in_stock_count * 100) if in_stock_count > 0 else 0 - - # 4. Low Rating Products Count (rating < 3.0) - low_rating_count = sum(1 for r in ratings if r < 3.0) - low_rating_rate = (low_rating_count / len(ratings) * 100) if ratings else 0 - - # KPIs - kpis = { - "total_products": total_products, - "total_subcategories": report.total_subcategories, - "total_brands": total_brands, - "avg_price": round(avg_price, 2), - "median_price": round(median_price, 2), - # DISABLED: Discount-related KPIs (not needed per user request) - # "discounted_products": discounted_count, - # "discount_rate": round(discount_rate, 2), - # "avg_discount_depth": round(avg_discount_depth, 2), - # DISABLED: Stock-related KPIs (not needed per user request) - # "out_of_stock": out_of_stock_count, - # "in_stock": in_stock_count, - # "running_out": running_out_count, - # "stock_risk": round(stock_risk, 2), - "avg_rating": round(avg_rating, 2), - "low_rating_count": low_rating_count, - "low_rating_rate": round(low_rating_rate, 2), - # DISABLED: Flash products (not needed per user request) - # "flash_products": flash_count, - "min_price": round(min_price, 2), - "max_price": round(max_price, 2) - } - - # Price distribution (for bar chart) - price_ranges = { - "0-100": 0, - "100-250": 0, - "250-500": 0, - "500-1000": 0, - "1000+": 0 - } - for price in prices: - if price < 100: - price_ranges["0-100"] += 1 - elif price < 250: - price_ranges["100-250"] += 1 - elif price < 500: - price_ranges["250-500"] += 1 - elif price < 1000: - price_ranges["500-1000"] += 1 - else: - price_ranges["1000+"] += 1 - - # Top 10 categories by sales (orders from social proof data) - # First, try to get social proof data to calculate by sales - category_sales = {} - try: - pass - # Try to get social proof data - check for different batch sizes - # The social proof cache uses format: {report_id}:b{batch_size} - # Try common batch sizes: 100, 5 (default), 10, 20 - social_data = None - for batch_size in [100, 5, 10, 20]: - social_cache_key = f"{report_id}:b{batch_size}" - if social_cache_key in social_proof_cache: - social_data = social_proof_cache.get(social_cache_key) - if social_data: - pass - # print(f"[DEBUG] Found social proof cache with batch_size={batch_size}") - break - - # If not in cache, try loading from persisted JSON - if not social_data: - pass - # print(f"[DEBUG] No social proof cache found, trying persisted JSON") - persisted = _load_json(f"{REPORTS_DIR}/enrich_{report_id}/social.json") - if persisted: - social_data = { - "details": persisted.get("details", {}), - "aggregation": { - "products": persisted.get("products", 0), - "total": persisted.get("total", {}), - "missing": persisted.get("missing", 0) - } - } - # Cache it for future use with batch_size=5 (default) - social_proof_cache.set(f"{report_id}:b5", social_data) - # print(f"[DEBUG] Loaded social proof data from JSON with {len(social_data['details'])} products") - else: - pass - # print(f"[DEBUG] No persisted social proof JSON found for report {report_id}") - - if social_data: - social_details = social_data.get("details", {}) - # print(f"[DEBUG] Found social data with {len(social_details)} products") - - # Calculate sales per category - for cat_name, cat_products in categories_data.items(): - total_orders = 0 - for product in cat_products: - pid = product.get("id") - if pid and str(pid) in social_details: - product_orders = social_details[str(pid)].get("orders", 0) - total_orders += product_orders - if product_orders > 0: - pass - # print(f"[DEBUG] Product {pid} in {cat_name}: {product_orders} orders") - - category_sales[cat_name] = { - "name": cat_name, - "count": len(cat_products), - "total_orders": total_orders - } - # print(f"[DEBUG] Category {cat_name}: {total_orders} total orders from {len(cat_products)} products") - - # Sort by total_orders (sales) - top_categories = sorted( - category_sales.values(), - key=lambda x: x["total_orders"], - reverse=True - )[:20] - # print(f"[DEBUG] Top categories sorted by orders: {[(c['name'], c['total_orders']) for c in top_categories[:3]]}") - else: - pass - # Fallback: If no social proof data, sort by product count - top_categories = sorted( - [{"name": cat, "count": len(products), "total_orders": 0} for cat, products in categories_data.items()], - key=lambda x: x["count"], - reverse=True - )[:20] - except Exception as e: - pass - # print(f"[DEBUG] Error calculating category sales: {str(e)}") - import traceback - traceback.print_exc() - # Fallback: If any error, sort by product count - top_categories = sorted( - [{"name": cat, "count": len(products), "total_orders": 0} for cat, products in categories_data.items()], - key=lambda x: x["count"], - reverse=True - )[:20] - - # Top 10 brands by product count - top_brands = sorted( - [{"name": brand, "count": count} for brand, count in brands_data.items()], - key=lambda x: x["count"], - reverse=True - )[:20] - - # DISABLED: Stock status distribution (for pie chart) - not needed per user request - # stock_status = { - # "in_stock": in_stock_count, - # "out_of_stock": out_of_stock_count, - # "running_out": running_out_count - # } - - # Rating distribution - rating_distribution = { - "0-1": 0, - "1-2": 0, - "2-3": 0, - "3-4": 0, - "4-5": 0 - } - for product in all_products: - rating = product.get("rating", 0) - # Handle if rating is a dict (ratingScore) - if isinstance(rating, dict): - rating = rating.get("averageRating", 0) - - if rating < 1: - rating_distribution["0-1"] += 1 - elif rating < 2: - rating_distribution["1-2"] += 1 - elif rating < 3: - rating_distribution["2-3"] += 1 - elif rating < 4: - rating_distribution["3-4"] += 1 - else: - rating_distribution["4-5"] += 1 - - # Boxplot data (brand price statistics) - Top 10 brands - brand_price_stats = [] - for brand_name in [b["name"] for b in top_brands[:10]]: - brand_products = [p for p in all_products if p.get("brand", {}).get("name") == brand_name] - brand_prices = [p.get("price", {}).get("sellingPrice", 0) for p in brand_products if p.get("price", {}).get("sellingPrice")] - - if brand_prices and len(brand_prices) >= 4: # En az 4 veri noktası gerekli - # DOĞRU İSTATİSTİK: numpy percentile kullanımı - percentiles = np.percentile(brand_prices, [0, 25, 50, 75, 100]) - brand_price_stats.append({ - "brand": brand_name, - "min": round(float(percentiles[0]), 2), - "q1": round(float(percentiles[1]), 2), - "median": round(float(percentiles[2]), 2), - "q3": round(float(percentiles[3]), 2), - "max": round(float(percentiles[4]), 2), - "count": len(brand_prices) - }) - - # Scatter plot data (price vs rating) - Sample 500 products for performance - scatter_data = [] - sample_size = min(500, len(all_products)) - sampled_products = random.sample(all_products, sample_size) - - for p in sampled_products: - price = p.get("price", {}).get("sellingPrice", 0) - rating = p.get("rating", 0) - if isinstance(rating, dict): - rating = rating.get("averageRating", 0) - - if price > 0 and rating > 0: - scatter_data.append({ - "price": round(price, 2), - "rating": round(rating, 2), - "brand": p.get("brand", {}).get("name", "Unknown"), - "in_stock": p.get("inStock", False) - }) - - # Low rating products (rating < 3.0) - Top 20 - low_rating_products = [] - for p in all_products: - rating = p.get("rating", 0) - if isinstance(rating, dict): - rating = rating.get("averageRating", 0) - - if rating > 0 and rating < 3.0: - low_rating_products.append({ - "name": p.get("name", "Unknown")[:50], - "brand": p.get("brand", {}).get("name", "Unknown"), - "rating": round(rating, 2), - "price": round(p.get("price", {}).get("sellingPrice", 0), 2), - "in_stock": p.get("inStock", False) - }) - - low_rating_products = sorted(low_rating_products, key=lambda x: x["rating"])[:20] - - # Brand strength score (normalized metrics) - brand_strength_scores = [] - for brand_name in [b["name"] for b in top_brands[:10]]: - brand_products = [p for p in all_products if p.get("brand", {}).get("name") == brand_name] - brand_count = len(brand_products) - brand_share = (brand_count / total_products * 100) if total_products > 0 else 0 - - # Brand ratings - brand_ratings = [] - for p in brand_products: - rating = p.get("rating", 0) - if isinstance(rating, dict): - rating = rating.get("averageRating", 0) - if rating > 0: - brand_ratings.append(rating) - brand_avg_rating = sum(brand_ratings) / len(brand_ratings) if brand_ratings else 0 - - # Brand stockout rate - brand_out_of_stock = sum(1 for p in brand_products if not p.get("inStock", False)) - brand_stockout_rate = (brand_out_of_stock / brand_count * 100) if brand_count > 0 else 0 - - # Simple strength score: share + rating - stockout_rate - strength_score = brand_share + (brand_avg_rating * 5) - brand_stockout_rate - - brand_strength_scores.append({ - "brand": brand_name, - "share": round(brand_share, 2), - "avg_rating": round(brand_avg_rating, 2), - "stockout_rate": round(brand_stockout_rate, 2), - "strength_score": round(strength_score, 2) - }) - - brand_strength_scores = sorted(brand_strength_scores, key=lambda x: x["strength_score"], reverse=True) - - # Heatmap: Brand × Category Matrix (top 10 brands × top 10 categories) - # Get top 10 brands by product count - top_10_brands = [b["name"] for b in top_brands] - - # Get top 10 categories by product count - top_10_categories = [c["name"] for c in top_categories[:10]] - - # Build matrix: count products for each brand-category combination - heatmap_data = [] - for cat_name in top_10_categories: - cat_products = categories_data.get(cat_name, []) - for brand_name in top_10_brands: - # Count products for this brand-category pair - count = sum(1 for p in cat_products - if p.get("brand", {}).get("name") == brand_name) - - if count > 0: # Only include non-zero combinations - heatmap_data.append({ - "brand": brand_name, - "category": cat_name, - "value": count - }) - - # Anomalies (outlier prices using IQR method) - DOĞRU HESAPLAMA - if len(prices) > 4: - q1, q3 = np.percentile(prices, [25, 75]) - iqr = q3 - q1 - lower_bound = q1 - 1.5 * iqr - upper_bound = q3 + 1.5 * iqr - - anomalies = [] - for p in all_products: - price = p.get("price", {}).get("sellingPrice", 0) - if price > 0 and (price < lower_bound or price > upper_bound): - anomalies.append({ - "name": p.get("name", "Unknown")[:50], - "brand": p.get("brand", {}).get("name", "Unknown"), - "price": round(price, 2), - "type": "expensive" if price > upper_bound else "cheap" - }) - - anomalies = sorted(anomalies, key=lambda x: x["price"], reverse=True)[:20] - else: - anomalies = [] - - # Category-based Price Analysis (Price Premium/Discount relative to overall average) - category_price_analysis = [] - overall_avg_price = avg_price # Genel ortalama fiyat - - for cat_name, cat_products in categories_data.items(): - # Her kategorinin ürün fiyatlarını topla - cat_prices = [p.get("price", {}).get("sellingPrice", 0) for p in cat_products - if p.get("price", {}).get("sellingPrice", 0) > 0] - - if cat_prices: - cat_avg_price = sum(cat_prices) / len(cat_prices) - cat_median_price = float(np.percentile(cat_prices, 50)) - - # Fiyat primi hesaplama: (kategori_ort - genel_ort) / genel_ort * 100 - price_premium = ((cat_avg_price - overall_avg_price) / overall_avg_price * 100) if overall_avg_price > 0 else 0 - - category_price_analysis.append({ - "category": cat_name, - "avg_price": round(cat_avg_price, 2), - "median_price": round(cat_median_price, 2), - "price_premium": round(price_premium, 2), - "product_count": len(cat_prices), - "min_price": round(min(cat_prices), 2), - "max_price": round(max(cat_prices), 2) - }) - - # Fiyat primine göre sırala - category_price_analysis_sorted = sorted(category_price_analysis, key=lambda x: x["price_premium"], reverse=True) - - # En pahalı 10 kategori (pozitif prim) - most_expensive_categories = [c for c in category_price_analysis_sorted if c["price_premium"] > 0][:10] - - # En ucuz 10 kategori (negatif prim) - most_affordable_categories = [c for c in category_price_analysis_sorted if c["price_premium"] < 0][-10:] - most_affordable_categories.reverse() # En ucuzdan en pahalıya doğru sırala - - # ============================================================================ - # MENŞEİ ÜLKE VE BARKOD ANALİZİ - # ============================================================================ - - # Ülke kodlarını tam isimlere çeviren mapping - COUNTRY_NAMES = { - "TR": "Türkiye", - "CN": "Çin", - "US": "Amerika", - "GB": "İngiltere", - "FR": "Fransa", - "DE": "Almanya", - "IT": "İtalya", - "ES": "İspanya", - "KR": "Güney Kore", - "JP": "Japonya", - "IN": "Hindistan", - "TW": "Tayvan", - "HK": "Hong Kong", - "TH": "Tayland", - "VN": "Vietnam", - "PL": "Polonya", - "CZ": "Çek Cumhuriyeti", - "RO": "Romanya", - "BG": "Bulgaristan", - "GR": "Yunanistan", - "PT": "Portekiz", - "NL": "Hollanda", - "BE": "Belçika", - "CH": "İsviçre", - "AT": "Avusturya", - "SE": "İsveç", - "NO": "Norveç", - "DK": "Danimarka", - "FI": "Finlandiya", - "RU": "Rusya", - "UA": "Ukrayna", - "AE": "Birleşik Arap Emirlikleri", - "SA": "Suudi Arabistan", - "IL": "İsrail", - "EG": "Mısır", - "ZA": "Güney Afrika", - "BR": "Brezilya", - "MX": "Meksika", - "CA": "Kanada", - "AU": "Avustralya", - "NZ": "Yeni Zelanda", - "SG": "Singapur", - "MY": "Malezya", - "ID": "Endonezya", - "PH": "Filipinler", - "PK": "Pakistan", - "BD": "Bangladeş", - "AZ": "Azerbaycan", - } - - # Barkod prefix'lerine göre ülke kodu mapping (EAN-13 standardı) - BARCODE_COUNTRIES = { - # Trendyol Özel Barkodlar (Harfli) - "TYB": "Trendyol (İç Barkod)", - "SGT": "Trendyol Satıcı", - "KPE": "Trendyol Kampanya", - "RTN": "Trendyol İade", - "CDM": "Trendyol Özel", - - # EAN-13 Standart Barkodlar - "00-13": "ABD & Kanada", - "190-199": "Rezerve/Özel Kullanım", - "20-29": "Mağaza İçi Kullanım", - "30-37": "Fransa", - "380": "Bulgaristan", - "383": "Slovenya", - "370": "Litvanya", - "372": "Estonya", - "373": "Moldova", - "375": "Belarus", - "377": "Ermenistan", - "379": "Kazakistan", - "385": "Hırvatistan", - "387": "Bosna Hersek", - "400-440": "Almanya", - "45-49": "Japonya", - "50": "İngiltere", - "520-521": "Yunanistan", - "528": "Lübnan", - "529": "Kıbrıs", - "530": "Arnavutluk", - "531": "Makedonya", - "535": "Malta", - "539": "İrlanda", - "54": "Belçika & Lüksemburg", - "560": "Portekiz", - "569": "İzlanda", - "57": "Danimarka", - "590": "Polonya", - "594": "Romanya", - "599": "Macaristan", - "600-601": "Güney Afrika", - "603": "Gana", - "608": "Bahreyn", - "609": "Mauritius", - "611": "Fas", - "613": "Cezayir", - "615": "Nijerya", - "616": "Kenya", - "618": "Fildişi Sahili", - "619": "Tunus", - "621": "Suriye", - "622": "Mısır", - "624": "Libya", - "625": "Ürdün", - "626": "İran", - "627": "Kuveyt", - "628": "Suudi Arabistan", - "629": "BAE", - "630": "Katar", - "631": "Umman", - "64": "Finlandiya", - "690-699": "Çin", - "70": "Norveç", - "710-719": "Rezerve/Özel Kullanım", - "729": "İsrail", - "73": "İsveç", - "740": "Guatemala", - "741": "El Salvador", - "742": "Honduras", - "743": "Nikaragua", - "744": "Kosta Rika", - "745": "Panama", - "746": "Dominik Cumhuriyeti", - "750": "Meksika", - "754-755": "Kanada", - "759": "Venezuela", - "76": "İsviçre", - "770-771": "Kolombiya", - "773": "Uruguay", - "775": "Peru", - "777": "Bolivya", - "779": "Arjantin", - "780": "Şili", - "784": "Paraguay", - "786": "Ekvador", - "789-790": "Brezilya", - "80-83": "İtalya", - "84": "İspanya", - "850": "Küba", - "858": "Slovakya", - "859": "Çek Cumhuriyeti", - "860": "Sırbistan", - "865": "Moğolistan", - "867": "Kuzey Kore", - "868-869": "Türkiye", - "87": "Hollanda", - "880": "Güney Kore", - "884": "Kamboçya", - "885": "Tayland", - "888": "Singapur", - "890": "Hindistan", - "893": "Vietnam", - "896": "Pakistan", - "899": "Endonezya", - "90-91": "Avusturya", - "93": "Avustralya", - "94": "Yeni Zelanda", - "955": "Malezya", - "958": "Makao", - "977": "Süreli Yayınlar (ISSN)", - "978-979": "Kitaplar (ISBN)", - "980": "Para İade Kuponları", - "981-984": "Kuponlar", - "99": "Kuponlar", - } - - # Menşei ülke verilerini topla - origin_countries = [] - barcodes = [] - products_with_origin = 0 - products_with_barcode = 0 - - for cat_name, cat_products in categories_data.items(): - for product in cat_products: - # Menşei ülke bilgisini çıkar - merchant_listings = product.get("merchantListings", []) - if merchant_listings and len(merchant_listings) > 0: - custom_values = merchant_listings[0].get("customValues", []) - for cv in custom_values: - if cv.get("key") == "origin": - country_code = cv.get("value", "").upper() - if country_code: - origin_countries.append(country_code) - products_with_origin += 1 - break - - # Barkod bilgisini çıkar - if merchant_listings and len(merchant_listings) > 0: - variants = merchant_listings[0].get("variants", []) - if variants and len(variants) > 0: - barcode = variants[0].get("barcode", "") - if barcode: - barcodes.append(barcode) - products_with_barcode += 1 - - # Menşei ülke analizi - origin_country_counts = {} - for country_code in origin_countries: - origin_country_counts[country_code] = origin_country_counts.get(country_code, 0) + 1 - - # Ülke kodlarını tam isimlere çevir ve sırala - origin_country_data = [] - for code, count in origin_country_counts.items(): - country_name = COUNTRY_NAMES.get(code, f"Diğer ({code})") - percentage = (count / products_with_origin * 100) if products_with_origin > 0 else 0 - origin_country_data.append({ - "country_code": code, - "country_name": country_name, - "product_count": count, - "percentage": round(percentage, 2) - }) - - origin_country_data_sorted = sorted(origin_country_data, key=lambda x: x["product_count"], reverse=True) - - # Barkod prefix analizi (ilk 3 hane) - barcode_prefixes = {} - barcode_countries_detected = {} - - for barcode in barcodes: - if len(barcode) >= 3: - prefix = barcode[:3] - barcode_prefixes[prefix] = barcode_prefixes.get(prefix, 0) + 1 - - # Prefix'ten ülke tespiti - detected_country = "Bilinmiyor" - prefix_num = barcode[:3] - - # Tek prefix kontrolü - for key, country in BARCODE_COUNTRIES.items(): - if "-" in key: - start, end = key.split("-") - # Sayısal karşılaştırma yap (aralık uzunluğuna göre prefix'i kırp) - try: - range_len = len(start) - prefix_to_check = prefix_num[:range_len] if len(prefix_num) >= range_len else prefix_num - prefix_int = int(prefix_to_check) if prefix_to_check.isdigit() else -1 - start_int = int(start) - end_int = int(end) - if prefix_int >= start_int and prefix_int <= end_int: - detected_country = country - break - except ValueError: - continue - elif key == prefix_num[:len(key)]: - detected_country = country - break - - barcode_countries_detected[detected_country] = barcode_countries_detected.get(detected_country, 0) + 1 - - # Barkod prefix'lerini sırala - barcode_prefix_data = [] - for prefix, count in barcode_prefixes.items(): - percentage = (count / products_with_barcode * 100) if products_with_barcode > 0 else 0 - - # Prefix'ten ülke bul - detected_country = "Bilinmiyor" - for key, country in BARCODE_COUNTRIES.items(): - if "-" in key: - start, end = key.split("-") - # Sayısal karşılaştırma yap (aralık uzunluğuna göre prefix'i kırp) - try: - range_len = len(start) - prefix_to_check = prefix[:range_len] if len(prefix) >= range_len else prefix - prefix_int = int(prefix_to_check) if prefix_to_check.isdigit() else -1 - start_int = int(start) - end_int = int(end) - if prefix_int >= start_int and prefix_int <= end_int: - detected_country = country - break - except ValueError: - continue - elif key == prefix[:len(key)]: - detected_country = country - break - - barcode_prefix_data.append({ - "prefix": prefix, - "detected_country": detected_country, - "product_count": count, - "percentage": round(percentage, 2) - }) - - barcode_prefix_data_sorted = sorted(barcode_prefix_data, key=lambda x: x["product_count"], reverse=True)[:20] - - # Barkoddan tespit edilen ülkeleri sırala - barcode_country_data = [] - for country, count in barcode_countries_detected.items(): - percentage = (count / products_with_barcode * 100) if products_with_barcode > 0 else 0 - barcode_country_data.append({ - "country_name": country, - "product_count": count, - "percentage": round(percentage, 2) - }) - - barcode_country_data_sorted = sorted(barcode_country_data, key=lambda x: x["product_count"], reverse=True) - - # ============================================================================ - # SATICI ANALİZİ (MERCHANT ANALYSIS) - # ============================================================================ - - merchants_data = {} # merchant_id -> {total_products, total_price, winner_count} - total_winners = 0 - products_with_merchant = 0 - - for product in all_products: - merchant_listings = product.get("merchantListings", []) - if merchant_listings: - ml = merchant_listings[0] # İlk satıcı - merchant = ml.get("merchant", {}) - merchant_id = merchant.get("id") - - if merchant_id: - products_with_merchant += 1 - - # Satıcı verilerini topla - if merchant_id not in merchants_data: - pass - # Satıcı ismini al, boşsa officialName'i kullan, o da boşsa ID'yi kullan - merchant_name = merchant.get("name") or merchant.get("officialName") or f"Satıcı {merchant_id}" - merchants_data[merchant_id] = { - "merchant_id": merchant_id, - "merchant_name": merchant_name, - "product_count": 0, - "total_price": 0, - "winner_count": 0 - } - - merchants_data[merchant_id]["product_count"] += 1 - - # Fiyat bilgisi - price = product.get("price", {}).get("sellingPrice", 0) - if price > 0: - merchants_data[merchant_id]["total_price"] += price - - # Kazanan satıcı mı? - if ml.get("isWinner"): - merchants_data[merchant_id]["winner_count"] += 1 - total_winners += 1 - - # Satıcı listesi oluştur - merchant_list = [] - for merchant_id, data in merchants_data.items(): - avg_price = data["total_price"] / data["product_count"] if data["product_count"] > 0 else 0 - winner_ratio = (data["winner_count"] / data["product_count"] * 100) if data["product_count"] > 0 else 0 - - # Satıcı URL'sini oluştur - merchant_url = f"https://www.trendyol.com/magaza/{data['merchant_name'].lower().replace(' ', '-')}-m-{data['merchant_id']}" if data["merchant_name"] and data["merchant_name"] != f"Satıcı {data['merchant_id']}" else None - - merchant_list.append({ - "merchant_id": data["merchant_id"], - "merchant_name": data["merchant_name"], - "merchant_url": merchant_url, - "product_count": data["product_count"], - "avg_price": round(avg_price, 2), - "winner_count": data["winner_count"], - "winner_ratio": round(winner_ratio, 2) - }) - - # Ürün sayısına göre sırala - merchant_list_sorted = sorted(merchant_list, key=lambda x: x["product_count"], reverse=True) - top_merchants = merchant_list_sorted[:20] - - # Genel satıcı istatistikleri - total_merchants = len(merchants_data) - winner_percentage = (total_winners / products_with_merchant * 100) if products_with_merchant > 0 else 0 - - # ============================================================================ - # STOK MİKTAR ANALİZİ (STOCK QUANTITY ANALYSIS) - # ============================================================================ - - # DISABLED: Stock quantity analysis (not needed per user request) - # stock_quantities = [] - # category_stocks = {} # category -> [quantities] - # products_with_stock_info = 0 - # product_to_category = {} # product_id -> category_name mapping - - # # Önce ürün-kategori eşleşmesini oluştur - # for cat_name, cat_products in categories_data.items(): - # for product in cat_products: - # product_id = product.get("id") - # if product_id: - # product_to_category[product_id] = cat_name - - # for product in all_products: - # merchant_listings = product.get("merchantListings", []) - # if merchant_listings: - # ml = merchant_listings[0] - # variants = ml.get("variants", []) - # if variants: - # quantity = variants[0].get("quantity") - # if quantity is not None and quantity > 0: - # stock_quantities.append(quantity) - # products_with_stock_info += 1 - - # # Kategori bazlı stok - mapping'den al - # product_id = product.get("id") - # cat_name = product_to_category.get(product_id, "Diğer") - - # if cat_name not in category_stocks: - # category_stocks[cat_name] = [] - # category_stocks[cat_name].append(quantity) - - # # Stok istatistikleri - # if stock_quantities: - # avg_stock = sum(stock_quantities) / len(stock_quantities) - # median_stock = float(np.percentile(stock_quantities, 50)) - # total_stock = sum(stock_quantities) - # min_stock = min(stock_quantities) - # max_stock = max(stock_quantities) - # else: - # avg_stock = median_stock = total_stock = min_stock = max_stock = 0 - - # # Kategori bazlı stok analizi - # category_stock_analysis = [] - # for cat_name, quantities in category_stocks.items(): - # cat_avg_stock = sum(quantities) / len(quantities) if quantities else 0 - # cat_total_stock = sum(quantities) - - # category_stock_analysis.append({ - # "category": cat_name, - # "avg_stock": round(cat_avg_stock, 2), - # "total_stock": cat_total_stock, - # "product_count": len(quantities), - # "min_stock": min(quantities) if quantities else 0, - # "max_stock": max(quantities) if quantities else 0 - # }) - - # # Toplam stoka göre sırala - # category_stock_sorted = sorted(category_stock_analysis, key=lambda x: x["total_stock"], reverse=True) - - # # Stok dağılımı (binning) - # stock_distribution = { - # "0-100": 0, - # "101-500": 0, - # "501-1000": 0, - # "1001-5000": 0, - # "5000+": 0 - # } - - # for qty in stock_quantities: - # if qty <= 100: - # stock_distribution["0-100"] += 1 - # elif qty <= 500: - # stock_distribution["101-500"] += 1 - # elif qty <= 1000: - # stock_distribution["501-1000"] += 1 - # elif qty <= 5000: - # stock_distribution["1001-5000"] += 1 - # else: - # stock_distribution["5000+"] += 1 - - # Basitleştirilmiş ürün listesi (sadece fiyat analizi için) - # Full products data for Overview tab - full_products = [] - - for product in all_products: - price = product.get("price", {}).get("sellingPrice") - category = product.get("categoryName") or product.get("category") - brand = product.get("brand", {}).get("name") or product.get("brandName") or "Bilinmeyen" - - # Extract category name if it's a dict - if isinstance(category, dict): - category_name = category.get("name", "") - else: - category_name = category if category else "" - - # Social proof data (orders, views, baskets, favorites, etc.) - socialProofs is an array - social_proofs = product.get("socialProofs", []) - orders = 0 - page_views = 0 - baskets = 0 - favorites = 0 - - if isinstance(social_proofs, list): - for proof in social_proofs: - proof_type = proof.get("type", "") - value_str = proof.get("value", "0") - - # Parse value (can be string like "208" or "1k") - try: - if "k" in value_str.lower(): - parsed_value = int(float(value_str.lower().replace("k", "")) * 1000) - else: - parsed_value = int(value_str) - except: - parsed_value = 0 - - # Assign to appropriate field - if proof_type == "orderCountL3D": - orders = parsed_value - elif proof_type == "pageViewCount": - page_views = parsed_value - elif proof_type == "basketCount": - baskets = parsed_value - elif proof_type == "favoriteCount": - favorites = parsed_value - - # Product image and URL - images = product.get("images", []) - image_url = images[0] if isinstance(images, list) and len(images) > 0 else "" - - # Trendyol URL - product_url = product.get("url", "") - if not product_url: - content_id = product.get("contentId") or product.get("id") - if content_id: - product_url = f"https://www.trendyol.com/p/{content_id}" - - # Extract barcode from winnerVariant - barcode = "" - winner_variant = product.get("winnerVariant", {}) - if isinstance(winner_variant, dict): - barcode = winner_variant.get("barcode", "") - - # Extract country (origin) from merchantListings - country_code = "" - country_name = "Bilinmeyen" # Default value for products without origin data - merchant_listings_temp = product.get("merchantListings", []) - if merchant_listings_temp and len(merchant_listings_temp) > 0: - custom_values = merchant_listings_temp[0].get("customValues", []) - for cv in custom_values: - if cv.get("key") == "origin": - country_code = cv.get("value", "").upper() - country_name = COUNTRY_NAMES.get(country_code, f"Diğer ({country_code})" if country_code else "Bilinmeyen") - break - - # Extract review count - review_count = 0 - try: - review_count = int(product.get("rating_count", 0) or 0) - except: - try: - rating_obj = product.get("rating", {}) - if isinstance(rating_obj, dict): - review_count = int(rating_obj.get("totalComments", 0) or rating_obj.get("totalCount", 0) or 0) - except: - review_count = 0 - - # Extract rating score - rating_score = 0.0 - try: - rating_obj = product.get("rating", {}) - if isinstance(rating_obj, dict): - rating_score = float(rating_obj.get("averageRating", 0) or rating_obj.get("score", 0) or 0) - except: - rating_score = 0.0 - - if price and category_name: - full_products.append({ - "id": product.get("contentId") or product.get("id"), - "name": product.get("name", ""), - "brand": brand, - "price": price, - "category_name": category_name, - "orders": orders, - "page_views": page_views, - "baskets": baskets, # Basket/cart additions - "favorites": favorites, # Wishlist/favorites count - "review_count": review_count, # Review/comment count - "rating": rating_score, # Average rating score (0-5) - "image_url": image_url if image_url else "https://via.placeholder.com/150", - "url": product_url, - "barcode": barcode, # Barcode field added for barcode analysis - "country_code": country_code, # Country code (TR, CN, DE, etc.) - "country": country_name # Country name (Türkiye, Çin, Almanya, etc.) - }) - - result = { - "report_id": report_id, - "report_name": report.name, - "kpis": kpis, - "all_products": full_products, # Full product data with social proof, images, URLs - "charts": { - "price_distribution": price_ranges, - "top_categories": top_categories, - "top_brands": top_brands, - # DISABLED: "stock_status": stock_status, # Not needed per user request - "rating_distribution": rating_distribution, - "brand_price_boxplot": brand_price_stats, - "price_rating_scatter": scatter_data, - "brand_strength": brand_strength_scores, - "brand_category_heatmap": heatmap_data, - "category_price_premium": { - "all_categories": category_price_analysis_sorted, - "most_expensive": most_expensive_categories, - "most_affordable": most_affordable_categories - }, - "origin_analysis": { - "countries": origin_country_data_sorted, - "top_countries": origin_country_data_sorted[:10], - "total_products_with_origin": products_with_origin, - "coverage_percentage": round((products_with_origin / total_products * 100), 2) if total_products > 0 else 0 - }, - "barcode_analysis": { - "prefixes": barcode_prefix_data_sorted, - "countries_from_barcode": barcode_country_data_sorted, - "top_countries_from_barcode": barcode_country_data_sorted[:10], - "total_products_with_barcode": products_with_barcode, - "coverage_percentage": round((products_with_barcode / total_products * 100), 2) if total_products > 0 else 0 - }, - "merchant_analysis": { - "merchants": merchant_list_sorted, - "top_merchants": top_merchants, - "total_merchants": total_merchants, - "total_products_with_merchant": products_with_merchant, - "total_winners": total_winners, - "winner_percentage": round(winner_percentage, 2), - "coverage_percentage": round((products_with_merchant / total_products * 100), 2) if total_products > 0 else 0 - } - # DISABLED: Stock quantity analysis (not needed per user request) - # "stock_analysis": { - # "avg_stock": round(avg_stock, 2), - # "median_stock": round(median_stock, 2), - # "total_stock": total_stock, - # "min_stock": min_stock, - # "max_stock": max_stock, - # "products_with_stock_info": products_with_stock_info, - # "coverage_percentage": round((products_with_stock_info / total_products * 100), 2) if total_products > 0 else 0, - # "distribution": stock_distribution, - # "category_stocks": category_stock_sorted, - # "top_stocked_categories": category_stock_sorted[:10] - # } - }, - "insights": { - "low_rating_products": low_rating_products, - "anomalies": anomalies - } - } - - # Cache the result for 1 hour - dashboard_cache[cache_key] = (result, time.time()) - print(f"📊 Cached dashboard data for report {report_id}") - - return result + save_consolidated_report(report_id, data, REPORTS_DIR) + return data # ============================================================================ @@ -3401,7 +2387,7 @@ def social_proof(report_id: int, refresh: bool = False, batch_size: int = 5, db: return result except Exception as e: - pass + log_api.error(f"Enrichment failed for report: {e}", exc_info=True) # Mark as failed enrichment_progress.set(progress_key, { "status": "failed", @@ -3468,8 +2454,19 @@ def sales_analytics(report_id: int): # Return top products by orders top_by_orders = sorted(enriched_products, key=lambda x: x.get("orders", 0), reverse=True)[:20] + # Aggregate totals for sales funnel + total_views = sum(p.get("page_views", 0) for p in enriched_products) + total_baskets = sum(p.get("baskets", 0) for p in enriched_products) + total_orders = sum(p.get("orders", 0) for p in enriched_products) + return { - "top_products_by_orders": top_by_orders + "top_products_by_orders": top_by_orders, + "total_views": total_views, + "total_baskets": total_baskets, + "total_orders": total_orders, + "view_to_basket_rate": round((total_baskets / total_views * 100), 2) if total_views > 0 else 0, + "basket_to_order_rate": round((total_orders / total_baskets * 100), 2) if total_baskets > 0 else 0, + "view_to_order_rate": round((total_orders / total_views * 100), 2) if total_views > 0 else 0, } except Exception as e: @@ -3630,26 +2627,24 @@ def keyword_analysis( Returns: Keyword analiz sonuçları """ - # print(f"🔍 ========== KEYWORD ANALYSIS REQUEST ==========") - print(f"📋 Report ID: {report_id}") - print(f"⚙️ Parameters: min_frequency={min_frequency}, min_length={min_length}, word_count={min_word_count}-{max_word_count}, top_n={top_n}, category_filter={category_filter}") + log_keywords.info(f"Keyword analysis: report={report_id}, min_freq={min_frequency}, word_count={min_word_count}-{max_word_count}, top_n={top_n}") try: - print(f"📦 Ürünler yükleniyor...") + log_keywords.info("Ürünler yükleniyor...") # Load products all_products, categories_data = load_report_products(db, report_id) - print(f"✅ {len(all_products) if all_products else 0} ürün yüklendi") + log_keywords.info(f"{len(all_products) if all_products else 0} ürün yüklendi") if not all_products: - print(f"⚠️ Rapor için ürün bulunamadı!") + log_keywords.warning("Rapor için ürün bulunamadı!") return {"error": "No products found for this report"} # Load social proof data - print(f"📊 Social proof data yükleniyor...") + log_keywords.info("Social proof data yükleniyor...") social_json_path = os.path.join(REPORTS_DIR, f"enrich_{report_id}", "social.json") social_data = _load_json(social_json_path) social_details = social_data.get("details", {}) if social_data else {} - print(f"✅ Social proof data yüklendi: {len(social_details)} ürün (path: {social_json_path})") + log_keywords.info(f"Social proof data yüklendi: {len(social_details)} ürün") # Filter by category if specified if category_filter: @@ -3659,7 +2654,7 @@ def keyword_analysis( ] # Step 1: Extract keywords from all product names (OPTIMIZED) - print(f"🔤 Keyword extraction başlatılıyor... ({len(all_products)} ürün)") + log_keywords.info(f"Keyword extraction başlatılıyor... ({len(all_products)} ürün)") keyword_to_products = {} # {keyword: [product_ids]} product_keywords_map = {} # {product_id: [keywords]} @@ -3712,13 +2707,13 @@ def keyword_analysis( elapsed = time.time() - start_time rate = processed_count / elapsed if elapsed > 0 else 0 remaining = (len(all_products) - processed_count) / rate if rate > 0 else 0 - print(f"⏳ İşlenen ürün: {processed_count}/{len(all_products)} ({rate:.0f} ürün/sn, ~{remaining:.0f}s kaldı)") + log_keywords.info(f"İşlenen ürün: {processed_count}/{len(all_products)} ({rate:.0f} ürün/sn, ~{remaining:.0f}s kaldı)") elapsed_total = time.time() - start_time - print(f"✅ Keyword extraction tamamlandı: {len(keyword_to_products)} unique keyword bulundu ({elapsed_total:.2f}s)") + log_keywords.info(f"Keyword extraction tamamlandı: {len(keyword_to_products)} unique keyword ({elapsed_total:.2f}s)") # Step 2: Separate rare keywords (frequency 1-2) and common keywords (>= min_frequency) - print(f"🔍 Keyword ayrıştırma: rare (1-2) vs common (>={min_frequency})") + log_keywords.info(f"Keyword ayrıştırma: rare (1-2) vs common (>={min_frequency})") rare_keywords = { kw: product_ids for kw, product_ids in keyword_to_products.items() @@ -3729,10 +2724,10 @@ def keyword_analysis( for kw, product_ids in keyword_to_products.items() if len(product_ids) >= min_frequency } - print(f"✅ Rare keywords: {len(rare_keywords)} | Common keywords: {len(filtered_keywords)}") + log_keywords.info(f"Rare keywords: {len(rare_keywords)} | Common keywords: {len(filtered_keywords)}") # Step 3: Calculate metrics for each keyword (OPTIMIZED) - print(f"📊 Metrikler hesaplanıyor... ({len(filtered_keywords)} keyword)") + log_keywords.info(f"Metrikler hesaplanıyor... ({len(filtered_keywords)} keyword)") keyword_metrics = [] # Create product lookup dict for faster access @@ -3823,8 +2818,8 @@ def keyword_analysis( "views": views, "orders": orders, "reviews": review_count, - "price": product.get("price", {}).get("sellingPrice", 0) if isinstance(product.get("price"), dict) else 0, - "image_url": product.get("images", [])[0] if product.get("images") else "https://via.placeholder.com/150", + "price": _extract_price(product), + "image_url": product.get("imageUrl", "") or (product.get("images", [])[0] if product.get("images") else "https://via.placeholder.com/150"), "url": product.get("url", "") or f"https://www.trendyol.com/p/{pid}" }) @@ -3893,10 +2888,10 @@ def keyword_analysis( elapsed_metric = time.time() - metric_start_time rate = metric_count / elapsed_metric if elapsed_metric > 0 else 0 remaining = (len(filtered_keywords) - metric_count) / rate if rate > 0 else 0 - print(f"⏳ İşlenen keyword: {metric_count}/{len(filtered_keywords)} ({rate:.1f} keyword/sn, ~{remaining:.0f}s kaldı)") + log_keywords.info(f"İşlenen keyword: {metric_count}/{len(filtered_keywords)} ({rate:.1f} keyword/sn, ~{remaining:.0f}s kaldı)") metric_elapsed = time.time() - metric_start_time - print(f"✅ Metrikler hesaplandı: {len(keyword_metrics)} keyword ({metric_elapsed:.2f}s)") + log_keywords.info(f"Metrikler hesaplandı: {len(keyword_metrics)} keyword ({metric_elapsed:.2f}s)") # Step 4: Apply advanced filters # print(f"🔍 Gelişmiş filtreler uygulanıyor...") @@ -3968,10 +2963,10 @@ def keyword_analysis( kw["potential_score"] = round(potential, 2) filtered_metrics = [kw for kw in filtered_metrics if kw.get("potential_score", 0) >= min_potential_score] - print(f"✅ Filtreleme sonrası: {len(filtered_metrics)} keyword kaldı") - + log_keywords.info(f"Filtreleme sonrası: {len(filtered_metrics)} keyword kaldı") + # Step 5: Sort by selected criteria - print(f"📈 Sıralama yapılıyor: {sort_by} ({sort_order})...") + log_keywords.info(f"Sıralama yapılıyor: {sort_by} ({sort_order})...") reverse_order = sort_order == "desc" if sort_by == "frequency": @@ -4005,10 +3000,10 @@ def keyword_analysis( # Get paginated keywords paginated_keywords = filtered_metrics[start_index:end_index] - print(f"✅ Sayfa {page}/{total_pages} - {len(paginated_keywords)} keyword seçildi (toplam: {total_keywords})") + log_keywords.info(f"Sayfa {page}/{total_pages} - {len(paginated_keywords)} keyword (toplam: {total_keywords})") # Step 6: Process rare keywords (frequency 1-2) - Limited to top 100 for performance - print(f"📊 Rare keywords işleniyor... ({len(rare_keywords)} keyword)") + log_keywords.info(f"Rare keywords işleniyor... ({len(rare_keywords)} keyword)") rare_metrics = [] rare_count = 0 for keyword, product_ids in rare_keywords.items(): @@ -4047,7 +3042,7 @@ def keyword_analysis( # Sort rare keywords by orders (most promising first) rare_metrics.sort(key=lambda x: x["performance"]["total_orders"], reverse=True) - print(f"✅ Rare keywords işlendi: {len(rare_metrics)} keyword (top 100)") + log_keywords.info(f"Rare keywords işlendi: {len(rare_metrics)} keyword (top 100)") # Step 7: Build category × keyword matrix category_keyword_matrix = {} @@ -4087,17 +3082,14 @@ def keyword_analysis( } } - print(f"✅ ========== KEYWORD ANALYSIS COMPLETED ==========") - print(f"📊 Sonuç: {result['total_keywords']} common keywords, {result['total_rare_keywords']} rare keywords, {result['total_products_analyzed']} ürün") - print(f"📄 Sayfa {page}/{total_pages} - {len(result['keywords'])} keyword gösteriliyor, {len(result['rare_keywords'])} rare keyword") + log_keywords.info(f"Keyword analysis completed: {result['total_keywords']} common, {result['total_rare_keywords']} rare, {result['total_products_analyzed']} ürün") return result except Exception as e: import traceback error_trace = traceback.format_exc() - print(f"❌ Keyword analysis error: {str(e)}") - print(f"Traceback: {error_trace}") + log_keywords.error(f"Keyword analysis error: {e}", exc_info=True) return {"error": str(e), "traceback": error_trace, "note": "Failed to generate keyword analysis"} @@ -4149,15 +3141,14 @@ def product_finder( Returns: Filtrelenmiş ürün listesi """ - # print(f"🔍 ========== PRODUCT FINDER REQUEST ==========") - print(f"📋 Report ID: {report_id}, Page: {page}, Per Page: {per_page}") + log_api.info(f"Product finder: report={report_id}, page={page}, per_page={per_page}") try: pass # Load products all_products, categories_data = load_report_products(db, report_id) - print(f"✅ {len(all_products)} ürün yüklendi") - + log_api.info(f"{len(all_products)} ürün yüklendi") + if not all_products: return { "total_products": 0, @@ -4166,11 +3157,11 @@ def product_finder( "total_pages": 0, "products": [] } - + # Load social proof data social_data = _load_json(f"{REPORTS_DIR}/enrich_{report_id}/social.json") social_details = social_data.get("details", {}) if social_data else {} - print(f"✅ Social proof data yüklendi: {len(social_details)} ürün") + log_api.info(f"Social proof data yüklendi: {len(social_details)} ürün") # Create product lookup dict product_dict = {p.get("id"): p for p in all_products if p.get("id")} @@ -4225,13 +3216,7 @@ def product_finder( rating = float(rating_obj) # Get price - price = 0 - if product.get("price"): - price_obj = product.get("price") - if isinstance(price_obj, dict): - price = float(price_obj.get("sellingPrice", 0) or 0) - elif isinstance(price_obj, (int, float)): - price = float(price_obj) + price = _extract_price(product) # Get category category = product.get("category", {}) @@ -4362,7 +3347,7 @@ def product_finder( "barcode": product.get("barcode", "") }) - print(f"✅ Filtreleme sonrası: {len(filtered_products)} ürün kaldı") + log_api.info(f"Filtreleme sonrası: {len(filtered_products)} ürün kaldı") # Sort products reverse_order = sort_order == "desc" @@ -4386,7 +3371,7 @@ def product_finder( end_idx = start_idx + per_page paginated_products = filtered_products[start_idx:end_idx] - print(f"✅ Sayfalama: {len(paginated_products)} ürün gösteriliyor (sayfa {page}/{total_pages})") + log_api.info(f"Sayfalama: {len(paginated_products)} ürün (sayfa {page}/{total_pages})") return { "total_products": total_products, @@ -4399,8 +3384,7 @@ def product_finder( except Exception as e: import traceback error_trace = traceback.format_exc() - print(f"❌ Product finder error: {str(e)}") - print(f"Traceback: {error_trace}") + log_api.error(f"Product finder error: {e}", exc_info=True) return { "error": str(e), "total_products": 0, @@ -4615,6 +3599,17 @@ def _enrich_report_task(report_id: int): _save_json(f"{base_dir}/social.json", soc_payload) time.sleep(0.1) + # Enrichment bitti, konsolide dosya oluştur + enrichment_progress[report_id] = {"status": "running", "step": "consolidate", "done": 1, "total": 2} + try: + from data_consolidator import build_consolidated_report, save_consolidated_report + consolidated = build_consolidated_report(report_id, db, REPORTS_DIR, social_data=soc_payload) + if consolidated: + save_consolidated_report(report_id, consolidated, REPORTS_DIR) + log_api.info(f"Konsolide rapor oluşturuldu: report {report_id}") + except Exception as ce: + log_api.warning(f"Konsolidasyon hatası (enrichment devam eder): {ce}", exc_info=True) + # DISABLED: Questions, similar products, and followers removed per user request # # 3) Questions # enrichment_progress[report_id] = {"status": "running", "step": "questions", "done": 2, "total": 5} @@ -4634,6 +3629,14 @@ def _enrich_report_task(report_id: int): # _save_json(f"{base_dir}/followers.json", f_payload) # time.sleep(0.1) + # Invalidate dashboard cache so next request gets fresh data with social proof + cache_key = f"dashboard_{report_id}" + if isinstance(dashboard_cache, dict) and cache_key in dashboard_cache: + del dashboard_cache[cache_key] + elif hasattr(dashboard_cache, 'cache') and cache_key in dashboard_cache.cache: + del dashboard_cache.cache[cache_key] + log_api.info(f"Dashboard cache invalidated for report {report_id} after enrichment") + enrichment_progress[report_id] = {"status": "completed", "step": "done", "done": 2, "total": 2} except Exception as e: enrichment_progress[report_id] = {"status": "error", "error": str(e)} @@ -4650,7 +3653,8 @@ def start_enrichment(report_id: int, background: BackgroundTasks): @app.get("/api/reports/{report_id}/enrich/status") def enrichment_status(report_id: int): - return enrichment_progress.get(report_id, {"status": "unknown"}) + result = enrichment_progress.get(report_id) + return result if result is not None else {"status": "unknown"} # ============================================================================ @@ -4663,8 +3667,8 @@ def get_hidden_champions( min_rating: float = 4.5, max_review_count: int = 50, social_multiplier: float = 1.5, - min_score: int = 60, - min_orders: int = 1, # Minimum satış sayısı (satış verisi çok önemli) + min_score: int = 30, + min_orders: int = 0, # Minimum satış sayısı (0 = sosyal veri yoksa da göster) limit: int = 50, db: Session = Depends(get_db) ): @@ -4772,11 +3776,7 @@ def test_analytics(report_id: int, db: Session = Depends(get_db)): avg_rating = sum(ratings) / len(ratings) if ratings else 0 # Ortalama fiyat - prices = [ - p.get("price", {}).get("sellingPrice", 0) - for p in all_products - if p.get("price", {}).get("sellingPrice", 0) > 0 - ] + prices = [_extract_price(p) for p in all_products if _extract_price(p) > 0] avg_price = sum(prices) / len(prices) if prices else 0 # 8. HHI yorumu ve stratejik tavsiye @@ -4964,6 +3964,34 @@ async def test_google_trends(product_name: str = "iPhone 15"): raise HTTPException(status_code=500, detail=str(e)) +# --------------------------------------------------------------------------- +# Periodic resource logger (runs every 60s in background) +# --------------------------------------------------------------------------- +_resource_logger = get_logger("resources") + +async def _periodic_resource_log(): + """Log cache sizes and circuit breaker state every 60 seconds.""" + while True: + await asyncio.sleep(60) + try: + cb_status = _social_proof_breaker.get_status() + _resource_logger.info( + "Resource snapshot", + extra={ + "cache_size": len(dashboard_cache) if isinstance(dashboard_cache, dict) else len(dashboard_cache.cache), + "cb_state": cb_status["status"], + "failures": cb_status["failures"], + }, + ) + except Exception: + pass # Never crash the background task + +@app.on_event("startup") +async def _start_resource_logger(): + asyncio.create_task(_periodic_resource_log()) + _resource_logger.info("Periodic resource logger started (60s interval)") + + if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8001) diff --git a/backend/scraper.py b/backend/scraper.py index a25257e..3e0427c 100644 --- a/backend/scraper.py +++ b/backend/scraper.py @@ -10,6 +10,9 @@ import math import os from typing import Dict, List, Any, Optional from datetime import datetime +from logging_config import get_logger + +log = get_logger("scraper") class TrendyolScraper: @@ -55,7 +58,7 @@ class TrendyolScraper: response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: - print(f"❌ Sayfa {page} error: {e}") + log.warning(f"Sayfa {page} error: {e}") return None def get_total_count(self) -> int: @@ -96,7 +99,7 @@ class TrendyolScraper: # Sayfa sayısını hesapla total_pages = self.calculate_total_pages(total_count, max_pages) - print(f"📦 Kategori {self.category_id}: {total_count} ürün, {total_pages} sayfa çekilecek") + log.info(f"Kategori {self.category_id}: {total_count} ürün, {total_pages} sayfa çekilecek") # Sayfaları çek all_products = [] @@ -105,7 +108,7 @@ class TrendyolScraper: data = self.fetch_page(page) if not data or not data.get('isSuccess'): - print(f"⚠️ Sayfa {page} atlandı") + log.warning(f"Sayfa {page} atlandı") continue products = data.get('products', []) @@ -144,7 +147,7 @@ class TrendyolScraper: return True except Exception as e: - print(f"❌ Dosya kaydetme hatası: {e}") + log.error(f"Dosya kaydetme hatası: {e}") return False def get_category_info(self) -> Optional[Dict[str, Any]]: @@ -157,6 +160,112 @@ class TrendyolScraper: return data.get('categoryInfo', {}) +class TrendyolSearchScraper: + """Trendyol Search API ile ürün çeker — tüm kategori tipleri için çalışır (-c ve -s)""" + + API_BASE_URL = "https://apigw.trendyol.com/discovery-sfint-search-service/api/search/products" + + def __init__(self, path_model: str, page_size: int = 24): + self.path_model = path_model + self.page_size = page_size + self.headers = { + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", + "Accept": "application/json", + "Referer": f"https://www.trendyol.com/{path_model}", + "Origin": "https://www.trendyol.com" + } + self.cookies = { + "storefrontId": "1", + "language": "tr", + "countryCode": "TR" + } + + def fetch_page(self, page: int) -> Optional[Dict[str, Any]]: + """Tek sayfa çeker""" + params = { + "pathModel": self.path_model, + "pi": page, + "ps": self.page_size, + "channelId": 1, + "storefrontId": 1, + "culture": "tr-TR" + } + try: + response = requests.get( + self.API_BASE_URL, + params=params, + headers=self.headers, + cookies=self.cookies, + timeout=15 + ) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + log.warning(f"Search API sayfa {page} error ({self.path_model}): {e}") + return None + + def fetch_all_products(self, delay: float = 1.0, max_pages: int = 10) -> List[Dict[str, Any]]: + """Tüm ürünleri çeker, normalize eder (max_pages=10 x page_size=24 = 240 ürün)""" + first = self.fetch_page(1) + if not first: + return [] + + total = first.get("total", 0) or first.get("totalCount", 0) or first.get("roughTotal", 0) + raw_products = first.get("products", []) + + if total == 0 and not raw_products: + return [] + + # total 0 olsa bile ürün varsa en az 1 sayfa çek + if total == 0 and raw_products: + total = len(raw_products) + + total_pages = min(math.ceil(total / self.page_size), max_pages) + log.info(f"Search API {self.path_model}: {total} ürün, {total_pages} sayfa çekilecek") + + for page in range(2, total_pages + 1): + data = self.fetch_page(page) + if data and data.get("products"): + raw_products.extend(data["products"]) + if page < total_pages: + time.sleep(delay) + + return [_normalize_search_product(p) for p in raw_products] + + +def _normalize_search_product(raw: dict) -> dict: + """Search API ürün formatını mevcut sisteme uyumlu hale getir""" + brand = raw.get("brand", {}) + if isinstance(brand, str): + brand = {"name": brand} + + price = raw.get("price", {}) + if isinstance(price, (int, float)): + price = {"sellingPrice": price, "originalPrice": price} + elif isinstance(price, dict) and "sellingPrice" not in price: + # Search API returns current/discountedPrice/originalPrice — map to sellingPrice + price["sellingPrice"] = price.get("discountedPrice") or price.get("current") or price.get("originalPrice") or price.get("old") or 0 + + rating = raw.get("ratingScore", {}) + if rating is None: + rating = {} + + return { + "id": raw.get("id") or raw.get("contentId"), + "name": raw.get("name", ""), + "brand": brand, + "price": price, + "ratingScore": rating, + "url": raw.get("url", ""), + "imageUrl": raw.get("image", raw.get("imageUrl", "")), + "merchantListings": raw.get("merchantListings", []), + "winnerVariant": raw.get("winnerVariant", {}), + "socialProofs": raw.get("socialProofs", []), + "categoryId": raw.get("categoryId"), + "categoryName": raw.get("categoryName"), + } + + def scrape_category(category_id: int, category_name: str, output_dir: str = "../categories") -> Dict[str, Any]: """ Tek bir kategoriyi çeker @@ -227,9 +336,7 @@ def scrape_multiple_categories(categories: List[tuple], delay: float = 2.0) -> D } for i, (cat_id, cat_name) in enumerate(categories, 1): - print(f"\n{'='*80}") - print(f"📂 [{i}/{len(categories)}] {cat_name} (ID: {cat_id})") - print('='*80) + log.info(f"[{i}/{len(categories)}] {cat_name} (ID: {cat_id})") result = scrape_category(cat_id, cat_name) results["details"].append(result) @@ -237,10 +344,10 @@ def scrape_multiple_categories(categories: List[tuple], delay: float = 2.0) -> D if result["success"]: results["successful"] += 1 results["total_products"] += result["total_products"] - print(f"✅ Başarılı: {result['total_products']} ürün") + log.info(f"Başarılı: {result['total_products']} ürün") else: results["failed"] += 1 - print(f"❌ Hata: {result['error']}") + log.error(f"Hata: {result['error']}") # Kategoriler arası bekleme if i < len(categories):