diff --git a/CLAUDE.md b/CLAUDE.md
index e72ed78..1c7294a 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,12 +1,12 @@
# CLAUDE.md
-Bu dosya Claude Code (claude.ai/code) için proje rehberidir.
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Proje Özeti
-**Trendyol Product Dashboard**: Trendyol e-ticaret platformu için kategori bazlı ürün analiz sistemi. 7 tab'lı dashboard, otomatik rapor oluşturma ve sosyal kanıt metrikleri.
+**Trendyol Product Dashboard**: Trendyol e-ticaret platformu için kategori bazlı ürün analiz sistemi. 9 tab'lı dashboard, otomatik rapor oluşturma, sosyal kanıt metrikleri ve hidden champion analizi.
-**Stack**: FastAPI + React 19 + Vite + SQLite + Tailwind CSS
+**Stack**: FastAPI + React 19 + Vite + PostgreSQL + Tailwind CSS
## Geliştirme Komutları
@@ -15,17 +15,33 @@ Bu dosya Claude Code (claude.ai/code) için proje rehberidir.
python3 start.py
# Manuel başlatma (iki terminal)
-cd backend && python3 main.py # Terminal 1 - Backend
-cd admin-panel && npm run dev # Terminal 2 - Frontend
+cd backend && python3 main.py # Terminal 1 - Backend (port 8001)
+cd admin-panel && npm run dev # Terminal 2 - Frontend (port 5173)
# Dependency kurulumu
cd backend && pip install -r requirements.txt # Python
cd admin-panel && npm install # Node.js
-# Diğer komutlar
-cd admin-panel && npm run build # Frontend build
-cd admin-panel && npm run lint # Lint
-cd backend && python3 -c "from database import init_db; init_db()" # DB init
+# Build & lint
+cd admin-panel && npm run build # Frontend production build
+cd admin-panel && npm run lint # ESLint
+
+# Backend testler
+cd backend && pytest # Tüm testler
+cd backend && pytest tests/test_cache.py # Tek test dosyası
+cd backend && pytest tests/test_cache.py -k "test_ttl" # Tek test
+
+# Frontend E2E testler (Playwright)
+cd admin-panel && npx playwright test # Tüm E2E testler
+cd admin-panel && npx playwright test tests/rare-keywords.spec.js # Tek spec
+
+# Docker ile çalıştırma
+./build-docker.sh && ./start-docker.sh # Build + start
+./stop-docker.sh # Durdur
+
+# DB migration
+cd backend && alembic upgrade head # Migration uygula
+cd backend && alembic revision --autogenerate -m "description" # Yeni migration
```
**Erişim URL'leri**:
@@ -39,23 +55,36 @@ cd backend && python3 -c "from database import init_db; init_db()" # DB init
### 3 Katmanlı Yapı
```
-React Frontend (admin-panel/) → FastAPI Backend (backend/) → SQLite + JSON
-├── CategoryManagement.jsx ├── main.py (~4400 satır) ├── trendyol.db
-├── ReportGeneration.jsx ├── database.py ├── categories/*.json
-├── ReportList.jsx └── scraper.py └── reports/*.json
-└── ReportDashboard.jsx (7 tab)
+React Frontend (admin-panel/) → FastAPI Backend (backend/) → PostgreSQL + JSON
+├── ReportDashboard.jsx (9 tab) ├── main.py (~5000 satır) ├── trendyol_db
+├── ReportGeneration.jsx ├── database.py (ORM) ├── categories/*.json
+├── ReportList.jsx ├── scraper.py └── reports/*.json
+├── ReportComparison.jsx ├── google_trends_helper.py
+└── CategoryManagement.jsx └── analytics/
+ ├── metrics.py
+ └── champion_finder.py
```
-### Dashboard Tab'ları (7 adet)
+### Frontend Routes
+| Path | Component | Açıklama |
+|------|-----------|----------|
+| `/` veya `/report` | ReportGeneration | Yeni rapor oluştur |
+| `/reports` | ReportList | Kayıtlı raporlar |
+| `/reports/:reportId` | ReportDashboard | 9 tab'lı analiz dashboard |
+| `/compare` | ReportComparison | Yan yana rapor karşılaştırma |
+
+### Dashboard Tab'ları (9 adet)
| Tab ID | Tab Adı | Component | Açıklama |
|--------|---------|-----------|----------|
| overview | Genel Bakış | OverviewTab | KPI'lar, özet grafikler |
| brand | Marka | BrandTab | Marka analizi, pazar payı |
| category | Kategori | CategoryTab | Kategori dağılımı |
| origin | Menşei | OriginTab | Ülke bazlı analiz |
-| barcode | Barkod | BarcodeTab | Barkod veri analizi |
-| keyword | Keyword Aracı | KeywordTab | Anahtar kelime analizi |
+| barcode | Barkod | BarcodeTab | Barkod/GS1 menşei analizi |
+| keyword | Keyword Aracı | KeywordTab | Anahtar kelime + Google Trends |
| product-finder | Ürün Bulma | ProductFinderTab | Ürün arama/filtreleme |
+| hidden-champions | Gizli Şampiyonlar | HiddenChampionsTab | Düşük yorum, yüksek puan fırsatları |
+| opportunity | Fırsat Analizi | OpportunityTab | Pazar fırsat analizi |
### Veri Akışı
@@ -77,12 +106,12 @@ React Frontend (admin-panel/) → FastAPI Backend (backend/) → SQLite +
**Backend'den gelen hazır objeleri kullan, ham hesaplama YAPMA:**
```jsx
-// ✅ DOĞRU - Hazır veriyi kullan
+// DOĞRU - Hazır veriyi kullan
const kpis = dashboardData?.kpis || {};
const topProducts = dashboardData?.charts?.top_products || [];
const topBrands = dashboardData?.charts?.top_brands || [];
-// ❌ YANLIŞ - all_products'tan hesaplama yapma
+// YANLIŞ - all_products'tan hesaplama yapma
const total = dashboardData?.all_products.reduce((sum, p) => sum + p.price, 0);
```
@@ -97,12 +126,11 @@ Frontend hesaplamalı veri, alan adı uyumsuzluğuna yol açabilir. Detay için:
**Çözüm Pattern - Mapping Layer**:
```jsx
-// Veriyi component beklentilerine dönüştür
const transformed = sourceData.map(item => ({
- country: item.name, // Beklenen alana map'le
- name: item.name, // Orijinali koru
- count: item.productCount, // Beklenen alana map'le
- productCount: item.productCount // Orijinali koru
+ country: item.name,
+ name: item.name,
+ count: item.productCount,
+ productCount: item.productCount
}));
```
@@ -111,7 +139,7 @@ const transformed = sourceData.map(item => ({
1. Tab config'i `src/constants/tabGroups.js`'e ekle
2. Tab component'ini `src/components/dashboard-tabs/` altına oluştur
3. `ReportDashboard.jsx`'te import et ve render bloğu ekle
-4. **Her zaman veri dönüşümü için console.log ekle**
+4. Gerekiyorsa backend'e yeni endpoint ekle (`main.py`)
## API Entegrasyonu
@@ -123,15 +151,10 @@ const transformed = sourceData.map(item => ({
| ENRICHMENT | 120s | Sosyal kanıt zenginleştirme |
| KEYWORD_ANALYSIS | 300s | Keyword analizi |
-### Polling Pattern
-```jsx
-// Exponential backoff with jitter (1s → 5s max)
-import { fetchWithTimeout, API_BASE_URL } from '../config/api';
-```
-
-### Rate Limit
-- Sosyal kanıt API: 2 istek/saniye
-- Exponential backoff kullanılır (%75 istek azaltımı sağlandı)
+### Rate Limit & Resilience
+- Sosyal kanıt API: 2 istek/saniye (RateLimiter)
+- Circuit breaker pattern for external API calls
+- Exponential backoff with jitter (1s → 5s max)
## Kod Değişiklik Kuralları
@@ -141,18 +164,45 @@ import { fetchWithTimeout, API_BASE_URL } from '../config/api';
- Uzun işlemler: BackgroundTasks + progress polling endpoint
- Harici API çağrıları: Her zaman timeout parametresi ekle
- Cache: BoundedCache kullan (asla sınırsız dict kullanma)
+- Analytics hesaplamaları: `analytics/` modülüne koy (metrics.py, champion_finder.py)
### Frontend
- `fetchWithTimeout` kullan (`src/config/api.js`'den)
- Async işlemler için loading state göster
- Eşzamanlı çağrılar için request deduplication uygula
+- Grafikler: Recharts kullan, veri dönüşümü `utils/chartTransformers.js`'de
+- Export: `utils/exportUtils.js` ile CSV/Excel
### CORS Değişiklikleri
-Yeni frontend portları için `main.py`'deki CORS allowlist'e ekle (satır 34-45):
+Yeni frontend portları için `main.py`'deki CORS allowlist'e ekle:
```python
allow_origins=["http://localhost:5173", "http://localhost:5174", ...]
```
+## Database
+
+**Dev**: `postgresql://postgres:trendyol123@localhost:5433/trendyol_db`
+**Docker**: `postgresql://postgres:trendyol123@postgres:5432/trendyol_db`
+
+Migrations: Alembic (`backend/alembic/`). Her schema değişikliğinde `alembic revision --autogenerate` çalıştır.
+
+| Model | Amaç | Anahtar Alanlar |
+|-------|------|-----------------|
+| Category | Hiyerarşik kategori ağacı | `parent_id` (self-ref), `trendyol_category_id` |
+| Snapshot | Aylık veri görüntüleri | `category_id`, `json_file_path` |
+| Report | Kayıtlı raporlar | `category_id`, `json_file_path` |
+| EnrichmentError | API hata logları | `endpoint`, `error_type`, `status_code` |
+
+## Deployment
+
+**Platform**: Coolify + Docker Compose + Traefik reverse proxy
+
+Docker Compose servisleri: `postgres` (15-alpine), `backend` (FastAPI), `frontend` (Nginx)
+
+`startup.sh` sırası: PostgreSQL bağlantı bekle → Alembic migration → Kategori seeding → Uvicorn başlat
+
+Traefik SSE streaming desteği: 100ms flush interval (rapor progress için)
+
## Kaynak Limitleri
| Kaynak | Limit |
@@ -163,26 +213,11 @@ allow_origins=["http://localhost:5173", "http://localhost:5174", ...]
| Sosyal kanıt batch | 5 ürün/istek |
| Rate limit | 2 istek/saniye (sosyal kanıt) |
-## Kritik Dependency'ler
-
-**Backend**: FastAPI 0.104.1, SQLAlchemy 2.0.45, Uvicorn 0.24.0, Requests 2.31.0, Pytrends 4.9.2
-
-**Frontend**: React 19.2.0, Vite 7.2.2, Recharts 3.4.1, Tailwind CSS 4.1.17, Axios 1.13.2
-
-## Database Modelleri
-
-| Model | Amaç | Anahtar Alanlar |
-|-------|------|-----------------|
-| Category | Hiyerarşik kategori ağacı | `parent_id` (self-ref), `trendyol_category_id` |
-| Snapshot | Aylık veri görüntüleri | `category_id`, `json_file_path` |
-| Report | Kayıtlı raporlar | `category_id`, `json_file_path` |
-| EnrichmentError | API hata logları | `endpoint`, `error_type`, `status_code` |
-
## Dokümantasyon
| Dosya | Amaç |
|-------|------|
-| docs/DASHBOARD_ARCHITECTURE.md | **Önemli** - Dashboard veri yapıları |
+| docs/DASHBOARD_ARCHITECTURE.md | Dashboard veri yapıları ve KPI tanımları |
| docs/bug-fixes/ORIGINTAB_BUG_FIX.md | **Kritik** - Alan adı uyumsuzluk pattern'i |
| docs/API_DOCUMENTATION.md | Tam API referansı |
| docs/ARCHITECTURE.md | Sistem mimarisi (Türkçe) |
diff --git a/admin-panel/src/components/ReportDashboard.jsx b/admin-panel/src/components/ReportDashboard.jsx
index 3ec35b8..420812c 100644
--- a/admin-panel/src/components/ReportDashboard.jsx
+++ b/admin-panel/src/components/ReportDashboard.jsx
@@ -99,17 +99,27 @@ function ReportDashboard() {
const products = dashboardData.all_products
const totalProducts = products.length
- const totalOrders = products.reduce((sum, p) => sum + (p.orders || 0), 0)
+ const rawOrders = products.reduce((sum, p) => sum + (p.orders || 0), 0)
+ const totalBaskets = products.reduce((sum, p) => sum + (p.baskets || 0), 0)
+ // Trendyol API artık order-count döndürmüyor — orders > 0 ise onu, yoksa baskets'ı kullan
+ const totalOrders = rawOrders > 0 ? rawOrders : totalBaskets
+ const ordersLabel = rawOrders > 0 ? 'orders' : 'baskets'
const totalViews = products.reduce((sum, p) => sum + (p.page_views || 0), 0)
+ const totalFavorites = products.reduce((sum, p) => sum + (p.favorites || 0), 0)
const avgPrice = products.reduce((sum, p) => sum + (p.price || 0), 0) / totalProducts
- const totalRevenue = products.reduce((sum, p) => sum + ((p.price || 0) * (p.orders || 0)), 0)
+ const totalRevenue = rawOrders > 0
+ ? products.reduce((sum, p) => sum + ((p.price || 0) * (p.orders || 0)), 0)
+ : products.reduce((sum, p) => sum + ((p.price || 0) * (p.baskets || 0)), 0)
const kpis = {
totalProducts,
totalOrders,
+ totalBaskets,
totalViews,
+ totalFavorites,
avgPrice: Math.round(avgPrice),
- totalRevenue: Math.round(totalRevenue)
+ totalRevenue: Math.round(totalRevenue),
+ ordersLabel
}
console.log('✅ [KPI] Calculated KPIs:', kpis)
diff --git a/admin-panel/src/components/dashboard-tabs/HiddenChampionsTab.jsx b/admin-panel/src/components/dashboard-tabs/HiddenChampionsTab.jsx
index dabf390..3931b5a 100644
--- a/admin-panel/src/components/dashboard-tabs/HiddenChampionsTab.jsx
+++ b/admin-panel/src/components/dashboard-tabs/HiddenChampionsTab.jsx
@@ -12,8 +12,8 @@ export default function HiddenChampionsTab({ reportId }) {
// Filters
const [minRating, setMinRating] = useState(4.0)
const [maxReview, setMaxReview] = useState(100)
- const [minOrders, setMinOrders] = useState(5)
- const [sortKey, setSortKey] = useState('performance_score')
+ const [minOrders, setMinOrders] = useState(0)
+ const [sortKey, setSortKey] = useState('hidden_champion_score')
const [sortDir, setSortDir] = useState('desc')
const [showFilters, setShowFilters] = useState(false)
@@ -41,9 +41,9 @@ export default function HiddenChampionsTab({ reportId }) {
// Filtered & sorted products
const filteredProducts = useMemo(() => {
- if (!data?.products) return []
+ if (!data?.hidden_champions) return []
- return data.products
+ return data.hidden_champions
.filter(p => {
const rating = p.rating || 0
const reviewCount = p.review_count || p.reviewCount || 0
@@ -230,10 +230,10 @@ export default function HiddenChampionsTab({ reportId }) {
handleSort('performance_score')}
+ onClick={() => handleSort('hidden_champion_score')}
>
- Skor
+ Skor
|
@@ -287,13 +287,13 @@ export default function HiddenChampionsTab({ reportId }) {
= 70
+ (product.hidden_champion_score || 0) >= 70
? 'bg-emerald-100 text-emerald-700'
- : (product.performance_score || 0) >= 40
+ : (product.hidden_champion_score || 0) >= 40
? 'bg-amber-100 text-amber-700'
: 'bg-slate-100 text-slate-600'
}`}>
- {(product.performance_score || 0).toFixed(0)}
+ {(product.hidden_champion_score || 0).toFixed(0)}
|
diff --git a/admin-panel/src/components/dashboard-tabs/OverviewTab.jsx b/admin-panel/src/components/dashboard-tabs/OverviewTab.jsx
index 3caf033..4662280 100644
--- a/admin-panel/src/components/dashboard-tabs/OverviewTab.jsx
+++ b/admin-panel/src/components/dashboard-tabs/OverviewTab.jsx
@@ -90,21 +90,21 @@ export default function OverviewTab({
? (sortedPrices[sortedPrices.length / 2 - 1] + sortedPrices[sortedPrices.length / 2]) / 2
: sortedPrices[Math.floor(sortedPrices.length / 2)]
- const bucketCount = 10
- const range = max - min || 1
- const bucketSize = range / bucketCount
+ // Use predefined price ranges for meaningful distribution
+ const ranges = [
+ [0, 50], [50, 100], [100, 200], [200, 500],
+ [500, 1000], [1000, 2000], [2000, 5000], [5000, 10000], [10000, Infinity]
+ ]
- const buckets = Array.from({ length: bucketCount }, (_, i) => ({
- range: `₺${Math.round(min + i * bucketSize)}-${Math.round(min + (i + 1) * bucketSize)}`,
- min: min + i * bucketSize,
- max: min + (i + 1) * bucketSize,
- count: 0
- }))
-
- prices.forEach(price => {
- const idx = Math.min(Math.floor((price - min) / bucketSize), bucketCount - 1)
- buckets[idx].count++
- })
+ // Filter out empty ranges and build buckets
+ const buckets = ranges
+ .map(([lo, hi]) => ({
+ range: hi === Infinity ? `₺${lo.toLocaleString('tr-TR')}+` : `₺${lo.toLocaleString('tr-TR')}-${hi.toLocaleString('tr-TR')}`,
+ min: lo,
+ max: hi,
+ count: prices.filter(p => p >= lo && (hi === Infinity ? true : p < hi)).length
+ }))
+ .filter(b => b.count > 0)
return { buckets, mean: Math.round(mean), median: Math.round(median) }
}, [allProducts])
@@ -186,7 +186,7 @@ export default function OverviewTab({
color="blue"
/>
b.min <= priceDistribution.mean && b.max > priceDistribution.mean)}
+ x={(priceDistribution.buckets.find(b => b.min <= priceDistribution.mean && (b.max === Infinity || b.max > priceDistribution.mean)) || {}).range}
stroke="#f97316"
strokeDasharray="5 5"
- label={{ value: `Ort: ₺${priceDistribution.mean}`, fill: '#f97316', fontSize: 11, position: 'top' }}
+ label={{ value: `Ort: ₺${priceDistribution.mean.toLocaleString('tr-TR')}`, fill: '#f97316', fontSize: 11, position: 'top' }}
/>
diff --git a/backend/Dockerfile b/backend/Dockerfile
index d189c96..a297164 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -30,7 +30,7 @@ COPY backend/ .
COPY categories/ /data/initial-categories/
# Create data directories with proper permissions
-RUN mkdir -p /data/categories /data/reports && \
+RUN mkdir -p /data/categories /data/reports /data/logs && \
chmod -R 755 /data
# Make startup script executable (before switching to non-root user)
diff --git a/backend/alembic/versions/38207dbbac44_add_path_model_to_categories.py b/backend/alembic/versions/38207dbbac44_add_path_model_to_categories.py
new file mode 100644
index 0000000..7507d78
--- /dev/null
+++ b/backend/alembic/versions/38207dbbac44_add_path_model_to_categories.py
@@ -0,0 +1,30 @@
+"""add path_model to categories
+
+Revision ID: 38207dbbac44
+Revises: 001
+Create Date: 2026-03-28 14:56:06.784769
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = '38207dbbac44'
+down_revision: Union[str, None] = '001'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.add_column('categories', sa.Column('path_model', sa.String(), nullable=True))
+ # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.drop_column('categories', 'path_model')
+ # ### end Alembic commands ###
diff --git a/backend/analytics/champion_finder.py b/backend/analytics/champion_finder.py
index fe88244..6a144f5 100644
--- a/backend/analytics/champion_finder.py
+++ b/backend/analytics/champion_finder.py
@@ -16,6 +16,51 @@ class HiddenChampionFinder:
Gizli şampiyonları bulan sınıf
Parçalı pazarlarda (düşük HHI) özelleştirilmiş filtreler kullanır
"""
+
+ @staticmethod
+ def _parse_social_proof_value(value_str: str) -> int:
+ """Parse '3k', '248k', '1.2k', '866' gibi değerleri sayıya çevir"""
+ if not value_str:
+ return 0
+ value_str = str(value_str).strip().lower().replace(".", "")
+ if value_str.endswith("k"):
+ try:
+ return int(float(value_str[:-1]) * 1000)
+ except (ValueError, TypeError):
+ return 0
+ if value_str.endswith("m"):
+ try:
+ return int(float(value_str[:-1]) * 1000000)
+ except (ValueError, TypeError):
+ return 0
+ try:
+ return int(value_str)
+ except (ValueError, TypeError):
+ return 0
+
+ @staticmethod
+ def _extract_social_proofs(product: Dict) -> Dict[str, int]:
+ """Ürünün socialProofs array'inden veri çıkar"""
+ result = {"page_views": 0, "orders": 0, "baskets": 0, "favorites": 0}
+ social_proofs = product.get("socialProofs", [])
+ if not social_proofs:
+ return result
+ type_map = {
+ "pageViewCount": "page_views",
+ "orderCountL3D": "orders",
+ "orderCountL365D": "orders",
+ "basketCount": "baskets",
+ "favoriteCount": "favorites",
+ }
+ for sp in social_proofs:
+ sp_type = sp.get("type", "")
+ mapped = type_map.get(sp_type)
+ if mapped:
+ val = HiddenChampionFinder._parse_social_proof_value(sp.get("value", "0"))
+ # Daha büyük değeri al (orderCountL3D vs orderCountL365D)
+ if val > result[mapped]:
+ result[mapped] = val
+ return result
def find(
self,
@@ -98,10 +143,12 @@ class HiddenChampionFinder:
pid = str(product.get("id"))
social = social_details.get(pid, {})
- page_views = social.get("page_views", 0) or 0
- orders = social.get("orders", 0) or 0
- baskets = social.get("baskets", 0) or 0
- favorites = social.get("favorites", 0) or 0
+ # Önce enriched social data, sonra ürünün kendi socialProofs'u
+ embedded_social = self._extract_social_proofs(product)
+ page_views = social.get("page_views", 0) or embedded_social["page_views"] or 0
+ orders = social.get("orders", 0) or embedded_social["orders"] or product.get("orders", 0) or 0
+ baskets = social.get("baskets", 0) or embedded_social["baskets"] or 0
+ favorites = social.get("favorites", 0) or embedded_social["favorites"] or 0
conversion_rate = (orders / page_views * 100) if page_views > 0 else 0
@@ -139,16 +186,29 @@ class HiddenChampionFinder:
# Minimum Orders kontrolü (satış verisi çok önemli)
min_orders = filters.get("min_orders", 1) # Varsayılan: en az 1 satış
+ # Sosyal veri var mı kontrol et
+ has_social = pid in social_details and page_views > 0
+
# Özelleştirilmiş Filtreleme (daha esnek)
- passes_filter = (
- rating >= filters.get("min_rating", 4.6) and
- review_count < filters.get("max_review_count", 30) and
- review_count >= 1 and # En az 1 yorum olmalı
- orders >= min_orders and # EN AZ 1 SATIŞ OLMALI (satış verisi çok önemli)
- (page_views >= threshold_views or page_views >= min_views_threshold) and # Kategori ortalamasının üzerinde VEYA minimum threshold
- (baskets >= threshold_baskets or baskets >= min_baskets_threshold) and # Sepet de kategori ortalamasının üzerinde VEYA minimum
- (conversion_rate >= 1.0 or page_views >= 500) # Minimum %1 conversion VEYA yüksek görüntülenme
- )
+ if has_social:
+ # Sosyal verisi olan ürünler: tam filtre
+ passes_filter = (
+ rating >= filters.get("min_rating", 4.6) and
+ review_count < filters.get("max_review_count", 30) and
+ review_count >= 1 and
+ orders >= min_orders and
+ (page_views >= threshold_views or page_views >= min_views_threshold) and
+ (baskets >= threshold_baskets or baskets >= min_baskets_threshold) and
+ (conversion_rate >= 1.0 or page_views >= 500)
+ )
+ else:
+ # Sosyal verisi olmayan ürünler: sadece rating + review + orders filtresi
+ passes_filter = (
+ rating >= filters.get("min_rating", 4.6) and
+ review_count < filters.get("max_review_count", 30) and
+ review_count >= 1 and
+ orders >= min_orders
+ )
if passes_filter:
# Potential score hesapla
@@ -196,7 +256,7 @@ class HiddenChampionFinder:
"category": category_name,
"rating": round(rating, 2),
"review_count": review_count,
- "price": product.get("price", {}).get("sellingPrice", 0),
+ "price": (product.get("price", {}).get("sellingPrice", 0) or product.get("price", {}).get("discountedPrice", 0) or product.get("price", {}).get("current", 0)) if isinstance(product.get("price"), dict) else (product.get("price", 0) or 0),
"page_views": page_views,
"orders": orders,
"baskets": baskets,
diff --git a/backend/analytics/metrics.py b/backend/analytics/metrics.py
index eac9275..923ead4 100644
--- a/backend/analytics/metrics.py
+++ b/backend/analytics/metrics.py
@@ -245,7 +245,13 @@ def get_rating_value(product: Dict) -> float:
rating = product.get("rating", 0)
if isinstance(rating, dict):
return rating.get("averageRating", 0) or 0
- return float(rating) if rating else 0
+ if rating:
+ return float(rating)
+ # Fallback: ratingScore nested object
+ rating_score = product.get("ratingScore", {})
+ if isinstance(rating_score, dict):
+ return float(rating_score.get("averageRating", 0) or 0)
+ return 0
def get_review_count(product: Dict) -> int:
@@ -263,6 +269,11 @@ def get_review_count(product: Dict) -> int:
rating = product.get("rating", {})
if isinstance(rating, dict):
review_count = rating.get("totalComments", 0) or rating.get("totalCount", 0) or 0
+ if not review_count:
+ # Fallback: ratingScore nested object
+ rating_score = product.get("ratingScore", {})
+ if isinstance(rating_score, dict):
+ review_count = rating_score.get("totalCount", 0) or 0
return int(review_count) if review_count else 0
diff --git a/backend/category_seeder.py b/backend/category_seeder.py
new file mode 100644
index 0000000..b1ef7af
--- /dev/null
+++ b/backend/category_seeder.py
@@ -0,0 +1,143 @@
+"""
+Category Seeder - Trendyol categories JSON'dan DB'ye aktarma
+Kaynak: /Users/furkanyigit/Desktop/trendyol_categories.json
+3 seviye hiyerarşi: Segment (Kadın) → Grup (Giyim) → Yaprak (Elbise)
+"""
+import json
+import re
+import os
+from database import SessionLocal, Category, Snapshot, Report, EnrichmentError
+from logging_config import get_logger
+
+log = get_logger("seeder")
+
+DEFAULT_JSON_PATH = os.path.expanduser("~/Desktop/trendyol_categories.json")
+
+
+def parse_url(url: str) -> dict:
+ """URL'den path_model ve trendyol_category_id çıkar.
+
+ Örnekler:
+ /elbise-x-c56 → path_model="elbise-x-c56", category_id=56
+ /kanvas-canta-y-s20972 → path_model="kanvas-canta-y-s20972", category_id=None
+ /kadin-giyim-x-g1-c82 → path_model="kadin-giyim-x-g1-c82", category_id=82
+ """
+ # Strip leading slash
+ path_model = url.lstrip("/")
+
+ # Try to extract -c{id} from the end
+ m = re.search(r"-c(\d+)$", path_model)
+ category_id = int(m.group(1)) if m else None
+
+ return {
+ "path_model": path_model,
+ "trendyol_category_id": category_id,
+ }
+
+
+def seed_from_json(json_path: str = None, clear_existing: bool = True) -> dict:
+ """JSON dosyasını okuyup DB'ye yazar.
+
+ Returns:
+ {"segments": int, "groups": int, "leaves": int, "total": int}
+ """
+ json_path = json_path or DEFAULT_JSON_PATH
+
+ with open(json_path, "r", encoding="utf-8") as f:
+ data = json.load(f)
+
+ db = SessionLocal()
+ try:
+ if clear_existing:
+ # FK constraint nedeniyle referans veren tabloları önce temizle
+ db.query(EnrichmentError).delete(synchronize_session=False)
+ db.query(Report).delete(synchronize_session=False)
+ db.query(Snapshot).delete(synchronize_session=False)
+ db.query(Category).filter(Category.parent_id != None).delete(synchronize_session=False) # noqa: E711
+ db.query(Category).delete(synchronize_session=False)
+ db.commit()
+ log.info("Mevcut kategoriler ve bağlı veriler silindi")
+
+ stats = {"segments": 0, "groups": 0, "leaves": 0, "total": 0}
+
+ for segment_name, groups in data.items():
+ # Seviye 1: Segment (Kadın, Erkek, ...)
+ segment = Category(
+ name=segment_name,
+ parent_id=None,
+ trendyol_category_id=None,
+ trendyol_url=None,
+ path_model=None,
+ is_active=True,
+ )
+ db.add(segment)
+ db.flush() # ID'yi al
+ stats["segments"] += 1
+ stats["total"] += 1
+
+ for group_item in groups:
+ group_name = group_item["name"]
+ group_url = group_item.get("url", "")
+ group_parsed = parse_url(group_url) if group_url else {"path_model": None, "trendyol_category_id": None}
+
+ children = group_item.get("children", [])
+
+ if children:
+ # Seviye 2: Grup (Giyim, Ayakkabı, ...)
+ group = Category(
+ name=group_name,
+ parent_id=segment.id,
+ trendyol_category_id=group_parsed["trendyol_category_id"],
+ trendyol_url=f"https://www.trendyol.com{group_url}" if group_url else None,
+ path_model=group_parsed["path_model"],
+ is_active=True,
+ )
+ db.add(group)
+ db.flush()
+ stats["groups"] += 1
+ stats["total"] += 1
+
+ for leaf_item in children:
+ leaf_url = leaf_item.get("url", "")
+ leaf_parsed = parse_url(leaf_url) if leaf_url else {"path_model": None, "trendyol_category_id": None}
+
+ leaf = Category(
+ name=leaf_item["name"],
+ parent_id=group.id,
+ trendyol_category_id=leaf_parsed["trendyol_category_id"],
+ trendyol_url=f"https://www.trendyol.com{leaf_url}" if leaf_url else None,
+ path_model=leaf_parsed["path_model"],
+ is_active=True,
+ )
+ db.add(leaf)
+ stats["leaves"] += 1
+ stats["total"] += 1
+ else:
+ # Çocuğu yok — bu grup aslında yaprak
+ leaf = Category(
+ name=group_name,
+ parent_id=segment.id,
+ trendyol_category_id=group_parsed["trendyol_category_id"],
+ trendyol_url=f"https://www.trendyol.com{group_url}" if group_url else None,
+ path_model=group_parsed["path_model"],
+ is_active=True,
+ )
+ db.add(leaf)
+ stats["leaves"] += 1
+ stats["total"] += 1
+
+ db.commit()
+ log.info(f"Seed tamamlandı: {stats}")
+ return stats
+
+ except Exception as e:
+ db.rollback()
+ log.error(f"Seed hatası: {e}")
+ raise
+ finally:
+ db.close()
+
+
+if __name__ == "__main__":
+ result = seed_from_json()
+ print(f"Seed tamamlandı: {result}")
diff --git a/backend/data_consolidator.py b/backend/data_consolidator.py
new file mode 100644
index 0000000..daa259f
--- /dev/null
+++ b/backend/data_consolidator.py
@@ -0,0 +1,791 @@
+"""
+Data Consolidator — tek birleştirilmiş JSON oluşturma modülü.
+
+Scraping + enrichment bittiğinde tüm normalizasyon ve hesaplamayı yapar,
+sonucu reports/report_{id}_data.json olarak kaydeder.
+Dashboard endpoint sadece bu dosyayı okur.
+"""
+import json
+import os
+import re
+import time
+import random
+from collections import defaultdict
+from datetime import datetime
+
+import numpy as np
+
+from logging_config import get_logger
+
+log = get_logger("consolidator")
+
+# ─────────────────────────────────────────────────────────
+# Ülke kodu → tam isim mapping (menşei analizi için)
+# ─────────────────────────────────────────────────────────
+COUNTRY_NAMES = {
+ "TR": "Türkiye", "CN": "Çin", "US": "Amerika", "GB": "İngiltere",
+ "FR": "Fransa", "DE": "Almanya", "IT": "İtalya", "ES": "İspanya",
+ "KR": "Güney Kore", "JP": "Japonya", "IN": "Hindistan", "TW": "Tayvan",
+ "HK": "Hong Kong", "TH": "Tayland", "VN": "Vietnam", "PL": "Polonya",
+ "CZ": "Çek Cumhuriyeti", "RO": "Romanya", "BG": "Bulgaristan",
+ "GR": "Yunanistan", "PT": "Portekiz", "NL": "Hollanda", "BE": "Belçika",
+ "CH": "İsviçre", "AT": "Avusturya", "SE": "İsveç", "NO": "Norveç",
+ "DK": "Danimarka", "FI": "Finlandiya", "RU": "Rusya", "UA": "Ukrayna",
+ "AE": "Birleşik Arap Emirlikleri", "SA": "Suudi Arabistan", "IL": "İsrail",
+ "EG": "Mısır", "ZA": "Güney Afrika", "BR": "Brezilya", "MX": "Meksika",
+ "CA": "Kanada", "AU": "Avustralya", "NZ": "Yeni Zelanda", "SG": "Singapur",
+ "MY": "Malezya", "ID": "Endonezya", "PH": "Filipinler", "PK": "Pakistan",
+ "BD": "Bangladeş", "AZ": "Azerbaycan",
+}
+
+# Barkod prefix → ülke (EAN-13)
+BARCODE_COUNTRIES = {
+ "TYB": "Trendyol (İç Barkod)", "SGT": "Trendyol Satıcı",
+ "KPE": "Trendyol Kampanya", "RTN": "Trendyol İade", "CDM": "Trendyol Özel",
+ "00-13": "ABD & Kanada", "190-199": "Rezerve/Özel Kullanım",
+ "20-29": "Mağaza İçi Kullanım", "30-37": "Fransa",
+ "380": "Bulgaristan", "383": "Slovenya", "370": "Litvanya",
+ "372": "Estonya", "373": "Moldova", "375": "Belarus",
+ "377": "Ermenistan", "379": "Kazakistan", "385": "Hırvatistan",
+ "387": "Bosna Hersek", "400-440": "Almanya", "45-49": "Japonya",
+ "50": "İngiltere", "520-521": "Yunanistan", "528": "Lübnan",
+ "529": "Kıbrıs", "530": "Arnavutluk", "531": "Makedonya",
+ "535": "Malta", "539": "İrlanda", "54": "Belçika & Lüksemburg",
+ "560": "Portekiz", "569": "İzlanda", "57": "Danimarka",
+ "590": "Polonya", "594": "Romanya", "599": "Macaristan",
+ "600-601": "Güney Afrika", "603": "Gana", "608": "Bahreyn",
+ "609": "Mauritius", "611": "Fas", "613": "Cezayir",
+ "615": "Nijerya", "616": "Kenya", "618": "Fildişi Sahili",
+ "619": "Tunus", "621": "Suriye", "622": "Mısır",
+ "624": "Libya", "625": "Ürdün", "626": "İran",
+ "627": "Kuveyt", "628": "Suudi Arabistan", "629": "BAE",
+ "630": "Katar", "631": "Umman", "64": "Finlandiya",
+ "690-699": "Çin", "70": "Norveç", "710-719": "Rezerve/Özel Kullanım",
+ "729": "İsrail", "73": "İsveç", "740": "Guatemala",
+ "741": "El Salvador", "742": "Honduras", "743": "Nikaragua",
+ "744": "Kosta Rika", "745": "Panama", "746": "Dominik Cumhuriyeti",
+ "750": "Meksika", "754-755": "Kanada", "759": "Venezuela",
+ "76": "İsviçre", "770-771": "Kolombiya", "773": "Uruguay",
+ "775": "Peru", "777": "Bolivya", "779": "Arjantin",
+ "780": "Şili", "784": "Paraguay", "786": "Ekvador",
+ "789-790": "Brezilya", "80-83": "İtalya", "84": "İspanya",
+ "850": "Küba", "858": "Slovakya", "859": "Çek Cumhuriyeti",
+ "860": "Sırbistan", "865": "Moğolistan", "867": "Kuzey Kore",
+ "868-869": "Türkiye", "87": "Hollanda", "880": "Güney Kore",
+ "884": "Kamboçya", "885": "Tayland", "888": "Singapur",
+ "890": "Hindistan", "893": "Vietnam", "896": "Pakistan",
+ "899": "Endonezya", "90-91": "Avusturya", "93": "Avustralya",
+ "94": "Yeni Zelanda", "955": "Malezya", "958": "Makao",
+ "977": "Süreli Yayınlar (ISSN)", "978-979": "Kitaplar (ISBN)",
+ "980": "Para İade Kuponları", "981-984": "Kuponlar", "99": "Kuponlar",
+}
+
+
+# ─────────────────────────────────────────────────────────
+# Yardımcı fonksiyonlar
+# ─────────────────────────────────────────────────────────
+
+def _extract_price(p):
+ """Extract selling price from product, handling both old and Search API formats."""
+ pr = p.get("price", {})
+ if isinstance(pr, (int, float)):
+ return pr
+ return (pr.get("sellingPrice") or pr.get("discountedPrice")
+ or pr.get("current") or pr.get("originalPrice")
+ or pr.get("old") or 0)
+
+
+def _extract_rating(p):
+ """Extract average rating from product."""
+ rating = p.get("ratingScore") or p.get("rating", 0)
+ if isinstance(rating, dict):
+ rating = rating.get("averageRating", 0)
+ try:
+ return float(rating) if rating else 0.0
+ except (ValueError, TypeError):
+ return 0.0
+
+
+def _extract_review_count(p):
+ """Extract review/comment count from product."""
+ review_count = 0
+ try:
+ review_count = int(p.get("rating_count", 0) or 0)
+ except (ValueError, TypeError, AttributeError):
+ pass
+ if not review_count:
+ try:
+ rating_obj = p.get("ratingScore") or p.get("rating", {})
+ if isinstance(rating_obj, dict):
+ review_count = int(
+ rating_obj.get("totalCount", 0)
+ or rating_obj.get("totalComments", 0)
+ or 0
+ )
+ except (ValueError, TypeError, AttributeError):
+ review_count = 0
+ return review_count
+
+
+def _parse_social_value(value_str):
+ """Parse social proof value like '642', '1.2k', '10B+' etc."""
+ try:
+ s = str(value_str).strip()
+ if "k" in s.lower():
+ return int(float(s.lower().replace("k", "").replace("+", "")) * 1000)
+ if "b+" in s.lower():
+ return int(float(s.lower().replace("b+", "")) * 1_000_000_000)
+ if "m+" in s.lower():
+ return int(float(s.lower().replace("m+", "")) * 1_000_000)
+ return int(s.replace("+", ""))
+ except (ValueError, TypeError):
+ return 0
+
+
+def _detect_barcode_country(prefix_num):
+ """Detect country from barcode prefix using BARCODE_COUNTRIES mapping."""
+ for key, country in BARCODE_COUNTRIES.items():
+ if "-" in key:
+ start, end = key.split("-")
+ try:
+ range_len = len(start)
+ prefix_to_check = prefix_num[:range_len] if len(prefix_num) >= range_len else prefix_num
+ prefix_int = int(prefix_to_check) if prefix_to_check.isdigit() else -1
+ if int(start) <= prefix_int <= int(end):
+ return country
+ except ValueError:
+ continue
+ elif key == prefix_num[:len(key)]:
+ return country
+ return "Bilinmiyor"
+
+
+# ─────────────────────────────────────────────────────────
+# 1. normalize_product
+# ─────────────────────────────────────────────────────────
+
+def normalize_product(raw_product, category_name, social_details):
+ """
+ Ham ürünü flat yapıya dönüştür.
+ Öncelik: inline socialProofs (Top Rankings) > enrichment API (social_details)
+ """
+ product_id = raw_product.get("contentId") or raw_product.get("id")
+ price = _extract_price(raw_product)
+ rating = _extract_rating(raw_product)
+ review_count = _extract_review_count(raw_product)
+
+ brand = raw_product.get("brand", {})
+ brand_name = (brand.get("name") if isinstance(brand, dict) else brand) or "Bilinmeyen"
+
+ # ── Social proof: önce inline socialProofs, sonra enrichment ──
+ orders, page_views, baskets, favorites = 0, 0, 0, 0
+
+ # İnline socialProofs (Top Rankings API — ürün dosyasında kayıtlı)
+ social_proofs = raw_product.get("socialProofs", [])
+ if isinstance(social_proofs, list):
+ for proof in social_proofs:
+ proof_type = proof.get("type", "")
+ parsed = _parse_social_value(proof.get("value", "0"))
+ if proof_type == "orderCountL3D":
+ orders = parsed
+ elif proof_type == "pageViewCount":
+ page_views = parsed
+ elif proof_type == "basketCount":
+ baskets = parsed
+ elif proof_type == "favoriteCount":
+ favorites = parsed
+
+ # Enrichment API (social.json) — inline yoksa veya 0 ise fallback
+ # Key hem str hem int olabilir (dosyadan str, memory'den int)
+ sp = {}
+ if product_id and social_details:
+ sp = (social_details.get(str(product_id))
+ or social_details.get(int(product_id) if str(product_id).isdigit() else -1)
+ or {})
+ if not orders:
+ orders = sp.get("orders", 0) or 0
+ if not page_views:
+ page_views = sp.get("page_views", 0) or 0
+ if not baskets:
+ baskets = sp.get("baskets", 0) or 0
+ if not favorites:
+ favorites = sp.get("favorites", 0) or 0
+
+ # ── Image URL ──
+ image_url = raw_product.get("imageUrl", "")
+ if not image_url:
+ images = raw_product.get("images", [])
+ image_url = images[0] if isinstance(images, list) and images else ""
+
+ # ── Product URL ──
+ product_url = raw_product.get("url", "")
+ if not product_url and product_id:
+ product_url = f"https://www.trendyol.com/p/{product_id}"
+
+ # ── Barcode ──
+ barcode = ""
+ winner_variant = raw_product.get("winnerVariant", {})
+ if isinstance(winner_variant, dict):
+ barcode = winner_variant.get("barcode", "")
+
+ # ── Country (origin) ──
+ country_code = ""
+ country_name = "Bilinmeyen"
+ merchant_listings = raw_product.get("merchantListings", [])
+ if merchant_listings:
+ custom_values = merchant_listings[0].get("customValues", [])
+ for cv in custom_values:
+ if cv.get("key") == "origin":
+ country_code = cv.get("value", "").upper()
+ country_name = COUNTRY_NAMES.get(
+ country_code, f"Diğer ({country_code})" if country_code else "Bilinmeyen"
+ )
+ break
+
+ return {
+ "id": product_id,
+ "name": raw_product.get("name", ""),
+ "brand": brand_name,
+ "category": category_name,
+ "category_name": category_name, # Frontend uyumluluğu (ProductFinderTab, OpportunityTab)
+ "price": round(price, 2) if price else 0,
+ "rating": round(rating, 2),
+ "review_count": review_count,
+ "orders": orders,
+ "page_views": page_views,
+ "baskets": baskets,
+ "favorites": favorites,
+ "barcode": barcode,
+ "country_code": country_code,
+ "country": country_name,
+ "image_url": image_url or "https://via.placeholder.com/150",
+ "url": product_url,
+ "in_stock": raw_product.get("inStock", False),
+ }
+
+
+# ─────────────────────────────────────────────────────────
+# 2. calculate_kpis
+# ─────────────────────────────────────────────────────────
+
+def calculate_kpis(products):
+ """KPI hesaplaması (main.py 2182-2262 mantığı)."""
+ total_products = len(products)
+ prices = [p["price"] for p in products if p["price"] > 0]
+ ratings = [p["rating"] for p in products if p["rating"] > 0]
+
+ avg_price = sum(prices) / len(prices) if prices else 0
+ median_price = float(np.percentile(prices, 50)) if prices else 0
+ min_price = min(prices) if prices else 0
+ max_price = max(prices) if prices else 0
+
+ avg_rating = sum(ratings) / len(ratings) if ratings else 0
+ low_rating_count = sum(1 for r in ratings if r < 3.0)
+ low_rating_rate = (low_rating_count / len(ratings) * 100) if ratings else 0
+
+ unique_brands = set(p["brand"] for p in products if p["brand"] and p["brand"] != "Bilinmeyen")
+ unique_subcategories = set(p["category"] for p in products if p["category"])
+
+ return {
+ "total_products": total_products,
+ "total_subcategories": len(unique_subcategories),
+ "total_brands": len(unique_brands),
+ "avg_price": round(avg_price, 2),
+ "median_price": round(median_price, 2),
+ "avg_rating": round(avg_rating, 2),
+ "low_rating_count": low_rating_count,
+ "low_rating_rate": round(low_rating_rate, 2),
+ "min_price": round(min_price, 2),
+ "max_price": round(max_price, 2),
+ }
+
+
+# ─────────────────────────────────────────────────────────
+# 3. calculate_charts
+# ─────────────────────────────────────────────────────────
+
+def calculate_charts(products):
+ """Grafik verisi hesaplaması (main.py 2264-3248 mantığı)."""
+ prices = [p["price"] for p in products if p["price"] > 0]
+ total_products = len(products)
+
+ # ── Price distribution ──
+ price_ranges = {"0-100": 0, "100-250": 0, "250-500": 0, "500-1000": 0, "1000+": 0}
+ for price in prices:
+ if price < 100:
+ price_ranges["0-100"] += 1
+ elif price < 250:
+ price_ranges["100-250"] += 1
+ elif price < 500:
+ price_ranges["250-500"] += 1
+ elif price < 1000:
+ price_ranges["500-1000"] += 1
+ else:
+ price_ranges["1000+"] += 1
+
+ # ── Kategori ve marka grupları ──
+ categories_data = defaultdict(list)
+ brands_data = defaultdict(int)
+ for p in products:
+ categories_data[p["category"]].append(p)
+ brands_data[p["brand"]] += 1
+
+ # ── Top categories (satışa göre sıralı) ──
+ top_categories = []
+ for cat_name, cat_products in categories_data.items():
+ total_orders = sum(p["orders"] for p in cat_products)
+ top_categories.append({
+ "name": cat_name,
+ "count": len(cat_products),
+ "total_orders": total_orders,
+ })
+ top_categories = sorted(top_categories, key=lambda x: x["total_orders"], reverse=True)[:20]
+
+ # ── Top brands ──
+ top_brands = sorted(
+ [{"name": brand, "count": count} for brand, count in brands_data.items()],
+ key=lambda x: x["count"], reverse=True,
+ )[:20]
+
+ # ── Rating distribution ──
+ rating_distribution = {"0-1": 0, "1-2": 0, "2-3": 0, "3-4": 0, "4-5": 0}
+ for p in products:
+ r = p["rating"]
+ if r < 1:
+ rating_distribution["0-1"] += 1
+ elif r < 2:
+ rating_distribution["1-2"] += 1
+ elif r < 3:
+ rating_distribution["2-3"] += 1
+ elif r < 4:
+ rating_distribution["3-4"] += 1
+ else:
+ rating_distribution["4-5"] += 1
+
+ # ── Brand price boxplot (top 10) ──
+ brand_price_stats = []
+ for brand_name in [b["name"] for b in top_brands[:10]]:
+ bp = [p["price"] for p in products if p["brand"] == brand_name and p["price"] > 0]
+ if bp and len(bp) >= 4:
+ pcts = np.percentile(bp, [0, 25, 50, 75, 100])
+ brand_price_stats.append({
+ "brand": brand_name,
+ "min": round(float(pcts[0]), 2),
+ "q1": round(float(pcts[1]), 2),
+ "median": round(float(pcts[2]), 2),
+ "q3": round(float(pcts[3]), 2),
+ "max": round(float(pcts[4]), 2),
+ "count": len(bp),
+ })
+
+ # ── Scatter plot (price vs rating) — sample 500 ──
+ scatter_data = []
+ sample_size = min(500, len(products))
+ sampled = random.sample(products, sample_size) if products else []
+ for p in sampled:
+ if p["price"] > 0 and p["rating"] > 0:
+ scatter_data.append({
+ "price": p["price"],
+ "rating": p["rating"],
+ "brand": p["brand"],
+ "in_stock": p["in_stock"],
+ })
+
+ # ── Brand strength score ──
+ brand_strength_scores = []
+ for brand_name in [b["name"] for b in top_brands[:10]]:
+ bp = [p for p in products if p["brand"] == brand_name]
+ brand_count = len(bp)
+ brand_share = (brand_count / total_products * 100) if total_products > 0 else 0
+ brand_ratings = [p["rating"] for p in bp if p["rating"] > 0]
+ brand_avg_rating = sum(brand_ratings) / len(brand_ratings) if brand_ratings else 0
+ brand_out_of_stock = sum(1 for p in bp if not p["in_stock"])
+ stockout_rate = (brand_out_of_stock / brand_count * 100) if brand_count > 0 else 0
+ strength = brand_share + (brand_avg_rating * 5) - stockout_rate
+ brand_strength_scores.append({
+ "brand": brand_name,
+ "share": round(brand_share, 2),
+ "avg_rating": round(brand_avg_rating, 2),
+ "stockout_rate": round(stockout_rate, 2),
+ "strength_score": round(strength, 2),
+ })
+ brand_strength_scores.sort(key=lambda x: x["strength_score"], reverse=True)
+
+ # ── Heatmap: Brand × Category ──
+ top_10_brands = [b["name"] for b in top_brands[:10]]
+ top_10_cats = [c["name"] for c in top_categories[:10]]
+ heatmap_data = []
+ for cat_name in top_10_cats:
+ cat_products = categories_data.get(cat_name, [])
+ for brand_name in top_10_brands:
+ count = sum(1 for p in cat_products if p["brand"] == brand_name)
+ if count > 0:
+ heatmap_data.append({"brand": brand_name, "category": cat_name, "value": count})
+
+ # ── Category price premium ──
+ avg_price = sum(prices) / len(prices) if prices else 0
+ category_price_analysis = []
+ for cat_name, cat_products in categories_data.items():
+ cp = [p["price"] for p in cat_products if p["price"] > 0]
+ if cp:
+ cat_avg = sum(cp) / len(cp)
+ cat_median = float(np.percentile(cp, 50))
+ premium = ((cat_avg - avg_price) / avg_price * 100) if avg_price > 0 else 0
+ category_price_analysis.append({
+ "category": cat_name,
+ "avg_price": round(cat_avg, 2),
+ "median_price": round(cat_median, 2),
+ "price_premium": round(premium, 2),
+ "product_count": len(cp),
+ "min_price": round(min(cp), 2),
+ "max_price": round(max(cp), 2),
+ })
+ category_price_analysis.sort(key=lambda x: x["price_premium"], reverse=True)
+ most_expensive = [c for c in category_price_analysis if c["price_premium"] > 0][:10]
+ most_affordable = [c for c in category_price_analysis if c["price_premium"] < 0][-10:]
+ most_affordable.reverse()
+
+ # ── Origin analysis ──
+ origin_counts = defaultdict(int)
+ products_with_origin = 0
+ for p in products:
+ if p["country_code"]:
+ origin_counts[p["country_code"]] += 1
+ products_with_origin += 1
+
+ origin_country_data = sorted(
+ [
+ {
+ "country_code": code,
+ "country_name": COUNTRY_NAMES.get(code, f"Diğer ({code})"),
+ "product_count": count,
+ "percentage": round(count / products_with_origin * 100, 2) if products_with_origin else 0,
+ }
+ for code, count in origin_counts.items()
+ ],
+ key=lambda x: x["product_count"], reverse=True,
+ )
+
+ # ── Barcode analysis ──
+ barcode_prefixes = defaultdict(int)
+ barcode_countries_detected = defaultdict(int)
+ products_with_barcode = 0
+ for p in products:
+ bc = p.get("barcode", "")
+ if bc and len(bc) >= 3:
+ products_with_barcode += 1
+ prefix = bc[:3]
+ barcode_prefixes[prefix] += 1
+ detected = _detect_barcode_country(prefix)
+ barcode_countries_detected[detected] += 1
+
+ barcode_prefix_data = sorted(
+ [
+ {
+ "prefix": prefix,
+ "detected_country": _detect_barcode_country(prefix),
+ "product_count": count,
+ "percentage": round(count / products_with_barcode * 100, 2) if products_with_barcode else 0,
+ }
+ for prefix, count in barcode_prefixes.items()
+ ],
+ key=lambda x: x["product_count"], reverse=True,
+ )[:20]
+
+ barcode_country_data = sorted(
+ [
+ {
+ "country_name": country,
+ "product_count": count,
+ "percentage": round(count / products_with_barcode * 100, 2) if products_with_barcode else 0,
+ }
+ for country, count in barcode_countries_detected.items()
+ ],
+ key=lambda x: x["product_count"], reverse=True,
+ )
+
+ # ── Merchant analysis ──
+ merchants_data = {}
+ total_winners = 0
+ products_with_merchant = 0
+ # We need raw product data for merchant analysis — use the flat products
+ # Merchant info is already lost in normalization, so we skip this in consolidator
+ # The original code extracted from raw_product.merchantListings
+ # For consolidated data, we'll build merchants from the products we have
+
+ # ── Build result ──
+ return {
+ "price_distribution": price_ranges,
+ "top_categories": top_categories,
+ "top_brands": top_brands,
+ "rating_distribution": rating_distribution,
+ "brand_price_boxplot": brand_price_stats,
+ "price_rating_scatter": scatter_data,
+ "brand_strength": brand_strength_scores,
+ "brand_category_heatmap": heatmap_data,
+ "category_price_premium": {
+ "all_categories": category_price_analysis,
+ "most_expensive": most_expensive,
+ "most_affordable": most_affordable,
+ },
+ "origin_analysis": {
+ "countries": origin_country_data,
+ "top_countries": origin_country_data[:10],
+ "total_products_with_origin": products_with_origin,
+ "coverage_percentage": round(products_with_origin / total_products * 100, 2) if total_products else 0,
+ },
+ "barcode_analysis": {
+ "prefixes": barcode_prefix_data,
+ "countries_from_barcode": barcode_country_data,
+ "top_countries_from_barcode": barcode_country_data[:10],
+ "total_products_with_barcode": products_with_barcode,
+ "coverage_percentage": round(products_with_barcode / total_products * 100, 2) if total_products else 0,
+ },
+ }
+
+
+def _calculate_merchant_analysis(raw_products, categories_data):
+ """
+ Satıcı analizini ham ürün verisinden hesapla (merchantListings alanı gerekli).
+ raw_products: ham Trendyol ürün dict listesi, categories_data: {cat_name: [products]}
+ """
+ merchants_data = {}
+ total_winners = 0
+ products_with_merchant = 0
+
+ for product in raw_products:
+ merchant_listings = product.get("merchantListings", [])
+ if not merchant_listings:
+ continue
+ ml = merchant_listings[0]
+ merchant = ml.get("merchant", {})
+ merchant_id = merchant.get("id")
+ if not merchant_id:
+ continue
+
+ products_with_merchant += 1
+ if merchant_id not in merchants_data:
+ merchant_name = merchant.get("name") or merchant.get("officialName") or f"Satıcı {merchant_id}"
+ merchants_data[merchant_id] = {
+ "merchant_id": merchant_id,
+ "merchant_name": merchant_name,
+ "product_count": 0,
+ "total_price": 0,
+ "winner_count": 0,
+ }
+
+ merchants_data[merchant_id]["product_count"] += 1
+ price = _extract_price(product)
+ if price > 0:
+ merchants_data[merchant_id]["total_price"] += price
+ if ml.get("isWinner"):
+ merchants_data[merchant_id]["winner_count"] += 1
+ total_winners += 1
+
+ merchant_list = []
+ for mid, data in merchants_data.items():
+ avg_price = data["total_price"] / data["product_count"] if data["product_count"] > 0 else 0
+ winner_ratio = (data["winner_count"] / data["product_count"] * 100) if data["product_count"] > 0 else 0
+ merchant_url = None
+ if data["merchant_name"] and not data["merchant_name"].startswith("Satıcı "):
+ merchant_url = f"https://www.trendyol.com/magaza/{data['merchant_name'].lower().replace(' ', '-')}-m-{mid}"
+ merchant_list.append({
+ "merchant_id": mid,
+ "merchant_name": data["merchant_name"],
+ "merchant_url": merchant_url,
+ "product_count": data["product_count"],
+ "avg_price": round(avg_price, 2),
+ "winner_count": data["winner_count"],
+ "winner_ratio": round(winner_ratio, 2),
+ })
+
+ merchant_list.sort(key=lambda x: x["product_count"], reverse=True)
+ total_products = len(raw_products)
+ total_merchants = len(merchants_data)
+ winner_percentage = (total_winners / products_with_merchant * 100) if products_with_merchant > 0 else 0
+
+ return {
+ "merchants": merchant_list,
+ "top_merchants": merchant_list[:20],
+ "total_merchants": total_merchants,
+ "total_products_with_merchant": products_with_merchant,
+ "total_winners": total_winners,
+ "winner_percentage": round(winner_percentage, 2),
+ "coverage_percentage": round(products_with_merchant / total_products * 100, 2) if total_products else 0,
+ }
+
+
+# ─────────────────────────────────────────────────────────
+# 4. calculate_insights
+# ─────────────────────────────────────────────────────────
+
+def calculate_insights(products):
+ """Low-rating ürünler ve fiyat anomalileri."""
+ # ── Low rating products ──
+ low_rating = []
+ for p in products:
+ if 0 < p["rating"] < 3.0:
+ low_rating.append({
+ "name": p["name"][:50],
+ "brand": p["brand"],
+ "rating": p["rating"],
+ "price": p["price"],
+ "in_stock": p["in_stock"],
+ })
+ low_rating = sorted(low_rating, key=lambda x: x["rating"])[:20]
+
+ # ── Anomalies (IQR) ──
+ prices = [p["price"] for p in products if p["price"] > 0]
+ anomalies = []
+ if len(prices) > 4:
+ q1, q3 = np.percentile(prices, [25, 75])
+ iqr = q3 - q1
+ lower = q1 - 1.5 * iqr
+ upper = q3 + 1.5 * iqr
+ for p in products:
+ if p["price"] > 0 and (p["price"] < lower or p["price"] > upper):
+ anomalies.append({
+ "name": p["name"][:50],
+ "brand": p["brand"],
+ "price": p["price"],
+ "type": "expensive" if p["price"] > upper else "cheap",
+ })
+ anomalies = sorted(anomalies, key=lambda x: x["price"], reverse=True)[:20]
+
+ return {"low_rating_products": low_rating, "anomalies": anomalies}
+
+
+# ─────────────────────────────────────────────────────────
+# 5. build_consolidated_report (ana orkestratör)
+# ─────────────────────────────────────────────────────────
+
+def build_consolidated_report(report_id, db, reports_dir, social_data=None):
+ """
+ Rapor verisini yükle → normalize et → hesapla → döndür.
+
+ Args:
+ report_id: DB rapor ID
+ db: SQLAlchemy session
+ reports_dir: reports/ klasör yolu
+ social_data: Enrichment social.json verisi (opsiyonel, yoksa dosyadan okunur)
+ Returns:
+ Konsolide dashboard dict
+ """
+ from database import Report
+ t0 = time.time()
+
+ report = db.query(Report).filter(Report.id == report_id).first()
+ if not report:
+ return None
+ if not report.json_file_path or not os.path.exists(report.json_file_path):
+ return None
+
+ # Rapor meta verisini oku
+ with open(report.json_file_path, "r", encoding="utf-8") as f:
+ report_data = json.load(f)
+
+ # Social proof verisini yükle
+ social_details = {}
+ if social_data:
+ social_details = social_data.get("details", {})
+ else:
+ social_file = os.path.join(reports_dir, f"enrich_{report_id}", "social.json")
+ if os.path.exists(social_file):
+ try:
+ with open(social_file, "r", encoding="utf-8") as f:
+ soc = json.load(f)
+ social_details = soc.get("details", {})
+ except Exception as e:
+ log.warning(f"Social proof dosyası okunamadı: {e}")
+
+ # ── Ham ürünleri yükle ve normalize et ──
+ normalized_products = []
+ raw_products_all = [] # Merchant analizi için ham verileri tut
+
+ for detail in report_data.get("details", []):
+ if not detail.get("success") or not detail.get("file_path"):
+ continue
+ file_path = detail["file_path"]
+ if not os.path.exists(file_path):
+ continue
+ try:
+ with open(file_path, "r", encoding="utf-8") as f:
+ cat_data = json.load(f)
+ raw_products = cat_data.get("products", [])
+ cat_name_raw = detail.get("category_name", "")
+ cat_name = re.sub(r'\s+\d+$', '', cat_name_raw)
+
+ for raw in raw_products:
+ # Set category on raw product for load_report_products compatibility
+ if isinstance(raw.get("category"), dict):
+ raw["category"]["name"] = cat_name
+ else:
+ raw["category"] = {"id": 0, "name": cat_name}
+
+ norm = normalize_product(raw, cat_name, social_details)
+ if norm["price"] and norm["category"]:
+ normalized_products.append(norm)
+
+ raw_products_all.extend(raw_products)
+ except (json.JSONDecodeError, OSError, KeyError) as e:
+ log.warning(f"Kategori dosyası okunamadı: {file_path}: {e}")
+ continue
+
+ if not normalized_products:
+ log.warning(f"Rapor {report_id} için ürün bulunamadı")
+ return None
+
+ # ── Hesaplamalar ──
+ kpis = calculate_kpis(normalized_products)
+ charts = calculate_charts(normalized_products)
+ insights = calculate_insights(normalized_products)
+
+ # Merchant analysis (ham veri gerekli)
+ charts["merchant_analysis"] = _calculate_merchant_analysis(raw_products_all, {})
+
+ elapsed = time.time() - t0
+ log.info(f"Rapor {report_id} konsolide edildi: {len(normalized_products)} ürün, {elapsed:.2f}s")
+
+ return {
+ "metadata": {
+ "report_id": report_id,
+ "report_name": report.name,
+ "created_at": report.created_at.isoformat() if report.created_at else None,
+ "total_products": len(normalized_products),
+ "total_categories": kpis["total_subcategories"],
+ "consolidated_at": datetime.now().isoformat(),
+ },
+ "report_id": report_id,
+ "report_name": report.name,
+ "products": normalized_products,
+ "all_products": normalized_products, # Geriye uyumluluk (frontend "all_products" bekliyor)
+ "kpis": kpis,
+ "charts": charts,
+ "insights": insights,
+ }
+
+
+# ─────────────────────────────────────────────────────────
+# 6. save / load
+# ─────────────────────────────────────────────────────────
+
+def save_consolidated_report(report_id, data, reports_dir):
+ """Konsolide veriyi reports/report_{id}_data.json olarak kaydet."""
+ path = os.path.join(reports_dir, f"report_{report_id}_data.json")
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+ with open(path, "w", encoding="utf-8") as f:
+ json.dump(data, f, ensure_ascii=False)
+ log.info(f"Konsolide rapor kaydedildi: {path}")
+ return path
+
+
+def load_consolidated_report(report_id, reports_dir):
+ """Konsolide dosya varsa oku, yoksa None döndür."""
+ path = os.path.join(reports_dir, f"report_{report_id}_data.json")
+ if os.path.exists(path):
+ try:
+ with open(path, "r", encoding="utf-8") as f:
+ return json.load(f)
+ except (json.JSONDecodeError, OSError) as e:
+ log.warning(f"Konsolide dosya okunamadı: {path}: {e}")
+ return None
diff --git a/backend/database.py b/backend/database.py
index 8d1753d..9207798 100644
--- a/backend/database.py
+++ b/backend/database.py
@@ -6,6 +6,9 @@ from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, relationship
from datetime import datetime
import os
+from logging_config import get_logger
+
+log = get_logger("db")
# PostgreSQL database - configurable via environment variable
# Default: Local PostgreSQL for development
@@ -26,6 +29,7 @@ class Category(Base):
parent_id = Column(Integer, ForeignKey('categories.id'), nullable=True)
trendyol_category_id = Column(Integer, nullable=True)
trendyol_url = Column(String, nullable=True)
+ path_model = Column(String, nullable=True) # URL slug for search API (e.g. "elbise-x-c56")
is_active = Column(Boolean, default=True)
created_at = Column(DateTime, default=datetime.utcnow)
@@ -86,7 +90,7 @@ class EnrichmentError(Base):
def init_db():
"""Initialize database - create tables"""
Base.metadata.create_all(bind=engine)
- print("✅ Database initialized successfully!")
+ log.info("Database initialized successfully")
def get_db():
diff --git a/backend/google_trends_helper.py b/backend/google_trends_helper.py
index 871f769..c65e8df 100644
--- a/backend/google_trends_helper.py
+++ b/backend/google_trends_helper.py
@@ -8,6 +8,9 @@ from pytrends.request import TrendReq
from typing import Dict, Optional
from datetime import datetime, timedelta
import time
+from logging_config import get_logger
+
+log = get_logger("trends")
class GoogleTrendsCache:
@@ -135,12 +138,12 @@ def fetch_google_trends(product_name: str, retries: int = 3) -> Dict:
except Exception as e:
error_msg = str(e)
- print(f"Google Trends API Error (attempt {attempt + 1}/{retries}): {error_msg}")
+ log.warning(f"Google Trends API Error (attempt {attempt + 1}/{retries}): {error_msg}")
# Rate limit error - wait longer
if '429' in error_msg or 'rate' in error_msg.lower():
wait_time = 5 * (attempt + 1) # 5, 10, 15 seconds
- print(f"Rate limited. Waiting {wait_time} seconds...")
+ log.warning(f"Rate limited. Waiting {wait_time} seconds...")
time.sleep(wait_time)
continue
diff --git a/backend/logging_config.py b/backend/logging_config.py
new file mode 100644
index 0000000..cb50f0c
--- /dev/null
+++ b/backend/logging_config.py
@@ -0,0 +1,197 @@
+"""
+Structured Logging Configuration for Trendyol Product Dashboard
+
+Provides:
+- JSON structured logs to file (for machine parsing)
+- Colored console logs (for human reading)
+- Correlation ID tracking per request/report
+- Rotating file handlers with size limits
+- Timing context manager for operation profiling
+"""
+
+import logging
+import logging.handlers
+import json
+import os
+import time
+from contextvars import ContextVar
+from contextlib import contextmanager
+from datetime import datetime, timezone
+
+# ---------------------------------------------------------------------------
+# Context variables for log correlation
+# ---------------------------------------------------------------------------
+
+_correlation_id: ContextVar[str] = ContextVar("correlation_id", default="-")
+_report_id: ContextVar[str] = ContextVar("report_id", default="-")
+
+
+def set_correlation_id(cid: str):
+ _correlation_id.set(cid)
+
+
+def get_correlation_id() -> str:
+ return _correlation_id.get()
+
+
+def set_report_id(rid):
+ _report_id.set(str(rid) if rid is not None else "-")
+
+
+def get_report_id() -> str:
+ return _report_id.get()
+
+
+# ---------------------------------------------------------------------------
+# JSON Formatter (file output)
+# ---------------------------------------------------------------------------
+
+class JSONFormatter(logging.Formatter):
+ """Structured JSON log formatter for file output."""
+
+ def format(self, record: logging.LogRecord) -> str:
+ log_entry = {
+ "ts": datetime.now(timezone.utc).isoformat(),
+ "level": record.levelname,
+ "logger": record.name,
+ "msg": record.getMessage(),
+ "correlation_id": get_correlation_id(),
+ "report_id": get_report_id(),
+ }
+
+ # Add extra fields if present
+ for key in ("url", "status_code", "response_time_ms", "response_size",
+ "error_type", "duration_ms", "cb_state", "failures",
+ "batch_size", "product_count", "cache_size"):
+ val = getattr(record, key, None)
+ if val is not None:
+ log_entry[key] = val
+
+ # Add exception info
+ if record.exc_info and record.exc_info[0] is not None:
+ log_entry["exception"] = self.formatException(record.exc_info)
+
+ return json.dumps(log_entry, ensure_ascii=False, default=str)
+
+
+# ---------------------------------------------------------------------------
+# Console Formatter (colored, human-readable)
+# ---------------------------------------------------------------------------
+
+_LEVEL_COLORS = {
+ "DEBUG": "\033[36m", # cyan
+ "INFO": "\033[32m", # green
+ "WARNING": "\033[33m", # yellow
+ "ERROR": "\033[31m", # red
+ "CRITICAL": "\033[1;31m", # bold red
+}
+_RESET = "\033[0m"
+
+
+class ConsoleFormatter(logging.Formatter):
+ """Colored, human-readable console formatter."""
+
+ def format(self, record: logging.LogRecord) -> str:
+ color = _LEVEL_COLORS.get(record.levelname, "")
+ ts = datetime.now().strftime("%H:%M:%S")
+ level = record.levelname[0] # D, I, W, E, C
+ report = get_report_id()
+ report_tag = f" [r:{report}]" if report != "-" else ""
+
+ msg = record.getMessage()
+ base = f"{color}{ts} [{level}]{report_tag} {msg}{_RESET}"
+
+ if record.exc_info and record.exc_info[0] is not None:
+ base += "\n" + self.formatException(record.exc_info)
+
+ return base
+
+
+# ---------------------------------------------------------------------------
+# Setup function
+# ---------------------------------------------------------------------------
+
+def setup_logging(log_dir: str = None):
+ """
+ Configure the entire logging system. Call once at startup.
+
+ Creates:
+ - logs/trendyol.log (all levels, JSON, 10MB x 5 rotation)
+ - logs/errors.log (WARNING+, JSON, 10MB x 3 rotation)
+ - console output (INFO+, colored)
+ """
+ if log_dir is None:
+ log_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "logs")
+
+ os.makedirs(log_dir, exist_ok=True)
+
+ root = logging.getLogger("trendyol")
+ root.setLevel(logging.DEBUG)
+
+ # Prevent duplicate handlers on reload
+ if root.handlers:
+ return
+
+ json_fmt = JSONFormatter()
+ console_fmt = ConsoleFormatter()
+
+ # 1. Main log file — all levels, JSON
+ main_handler = logging.handlers.RotatingFileHandler(
+ os.path.join(log_dir, "trendyol.log"),
+ maxBytes=10 * 1024 * 1024, # 10 MB
+ backupCount=5,
+ encoding="utf-8",
+ )
+ main_handler.setLevel(logging.DEBUG)
+ main_handler.setFormatter(json_fmt)
+ root.addHandler(main_handler)
+
+ # 2. Error log file — WARNING+, JSON
+ error_handler = logging.handlers.RotatingFileHandler(
+ os.path.join(log_dir, "errors.log"),
+ maxBytes=10 * 1024 * 1024,
+ backupCount=3,
+ encoding="utf-8",
+ )
+ error_handler.setLevel(logging.WARNING)
+ error_handler.setFormatter(json_fmt)
+ root.addHandler(error_handler)
+
+ # 3. Console — INFO+, colored
+ console_handler = logging.StreamHandler()
+ console_handler.setLevel(logging.INFO)
+ console_handler.setFormatter(console_fmt)
+ root.addHandler(console_handler)
+
+ # Quiet noisy libraries
+ logging.getLogger("urllib3").setLevel(logging.WARNING)
+ logging.getLogger("sqlalchemy").setLevel(logging.WARNING)
+ logging.getLogger("sqlalchemy.engine").setLevel(logging.WARNING)
+
+
+# ---------------------------------------------------------------------------
+# Logger factory
+# ---------------------------------------------------------------------------
+
+def get_logger(name: str) -> logging.Logger:
+ """Get a namespaced logger: trendyol."""
+ return logging.getLogger(f"trendyol.{name}")
+
+
+# ---------------------------------------------------------------------------
+# Timing context manager
+# ---------------------------------------------------------------------------
+
+@contextmanager
+def log_timing(logger: logging.Logger, operation: str, level=logging.INFO, **extra):
+ """Context manager that logs operation duration."""
+ start = time.monotonic()
+ try:
+ yield
+ finally:
+ elapsed_ms = round((time.monotonic() - start) * 1000, 1)
+ logger.log(
+ level,
+ f"{operation} completed in {elapsed_ms}ms",
+ extra={"duration_ms": elapsed_ms, **extra},
+ )
diff --git a/backend/main.py b/backend/main.py
index 017c3d4..9b87e8a 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -24,8 +24,20 @@ import os
from database import SessionLocal, Category, Snapshot, Report, EnrichmentError, init_db
from google_trends_helper import estimate_traffic_sources, fetch_google_trends
+from logging_config import setup_logging, get_logger, set_correlation_id, set_report_id, log_timing
+
+# Initialize logging first, then database
+setup_logging()
+
+log_http = get_logger("http")
+log_scraper = get_logger("scraper")
+log_social = get_logger("social")
+log_cache = get_logger("cache")
+log_db = get_logger("db")
+log_sse = get_logger("sse")
+log_api = get_logger("api")
+log_keywords = get_logger("keywords")
-# Initialize database on startup
init_db()
# GS1 Barcode Prefix to Country Mapping (EAN-13 / EAN-8)
@@ -401,6 +413,17 @@ class BoundedCache:
del self.cache[oldest]
del self.timestamps[oldest]
+ def __setitem__(self, key, value):
+ """Support cache[key] = value syntax"""
+ self.set(key, value)
+
+ def __getitem__(self, key):
+ """Support cache[key] syntax"""
+ result = self.get(key)
+ if result is None:
+ raise KeyError(key)
+ return result
+
def __contains__(self, key):
"""Support 'in' operator for cache key checking"""
with self.lock:
@@ -425,6 +448,13 @@ enrichment_progress = BoundedCache(maxsize=50, ttl=7200)
# similar_cache = {}
# followers_cache = {}
+def _extract_price(p):
+ """Extract selling price from product, handling both old and Search API formats"""
+ pr = p.get("price", {})
+ if isinstance(pr, (int, float)):
+ return pr
+ return pr.get("sellingPrice") or pr.get("discountedPrice") or pr.get("current") or pr.get("originalPrice") or pr.get("old") or 0
+
def _chunked(seq, size):
for i in range(0, len(seq), size):
yield seq[i:i+size]
@@ -512,6 +542,7 @@ _retry_strategy = Retry(
_http_adapter = HTTPAdapter(max_retries=_retry_strategy, pool_connections=100, pool_maxsize=200)
_session = requests.Session()
_session.headers.update(TRENDYOL_HEADERS)
+_session.cookies.update({"storefrontId": "1", "language": "tr", "countryCode": "TR"})
_session.mount("https://", _http_adapter)
_DEFAULT_TIMEOUT = 30 # Longer timeout to avoid premature failures
@@ -530,11 +561,12 @@ class _RateLimiter:
return
sleep_for = self._next_time - now
self._next_time += self.min_interval
- # small jitter to avoid bursts
- time.sleep(max(0, sleep_for) + random.uniform(0.0, 0.05))
+ actual_sleep = max(0, sleep_for) + random.uniform(0.0, 0.05)
+ log_http.debug(f"Rate limiter sleeping {actual_sleep:.3f}s")
+ time.sleep(actual_sleep)
-_trendyol_limiter = _RateLimiter(rate_per_sec=5.0) # 0.2 seconds between requests (Optimized for localhost - 10x faster!)
+_trendyol_limiter = _RateLimiter(rate_per_sec=1.5) # ~0.67s between requests (safe for Trendyol rate limits)
# Circuit Breaker for Social Proof endpoint
@@ -555,6 +587,10 @@ class _CircuitBreaker:
if time.monotonic() - self._opened_at > self.reset_timeout:
self._failures = 0
self._opened_at = None
+ log_social.warning(
+ "Circuit breaker auto-reset to CLOSED (half-open recovery)",
+ extra={"cb_state": "closed", "failures": 0},
+ )
return False
return True
@@ -564,12 +600,27 @@ class _CircuitBreaker:
self._failures += 1
if self._failures >= self.failure_threshold and self._opened_at is None:
self._opened_at = time.monotonic()
+ log_social.critical(
+ f"Circuit breaker OPENED after {self._failures} consecutive failures",
+ extra={"cb_state": "open", "failures": self._failures},
+ )
+ else:
+ log_social.warning(
+ f"Circuit breaker failure #{self._failures}/{self.failure_threshold}",
+ extra={"cb_state": "degraded", "failures": self._failures},
+ )
def record_success(self):
"""Record a success and reset the circuit"""
with self._lock:
+ was_open = self._opened_at is not None
self._failures = 0
self._opened_at = None
+ if was_open:
+ log_social.info(
+ "Circuit breaker reset to CLOSED after success",
+ extra={"cb_state": "closed", "failures": 0},
+ )
def get_status(self) -> dict:
"""Get current circuit status"""
@@ -585,13 +636,47 @@ class _CircuitBreaker:
}
-_social_proof_breaker = _CircuitBreaker(failure_threshold=3, reset_timeout=300.0)
+_social_proof_breaker = _CircuitBreaker(failure_threshold=5, reset_timeout=60.0)
def _http_get(url: str, params: dict) -> requests.Response:
"""GET with shared session, retry, timeout, and rate limiting."""
_trendyol_limiter.wait()
- return _session.get(url, params=params, timeout=_DEFAULT_TIMEOUT)
+ start = time.monotonic()
+ try:
+ resp = _session.get(url, params=params, timeout=_DEFAULT_TIMEOUT)
+ elapsed_ms = round((time.monotonic() - start) * 1000, 1)
+ log_http.debug(
+ f"{resp.status_code} {url}",
+ extra={
+ "url": url,
+ "status_code": resp.status_code,
+ "response_time_ms": elapsed_ms,
+ "response_size": len(resp.content),
+ },
+ )
+ return resp
+ except requests.exceptions.Timeout:
+ elapsed_ms = round((time.monotonic() - start) * 1000, 1)
+ log_http.warning(
+ f"TIMEOUT {url} after {elapsed_ms}ms",
+ extra={"url": url, "error_type": "timeout", "response_time_ms": elapsed_ms},
+ )
+ raise
+ except requests.exceptions.ConnectionError as e:
+ elapsed_ms = round((time.monotonic() - start) * 1000, 1)
+ log_http.warning(
+ f"CONNECTION_ERROR {url}: {e}",
+ extra={"url": url, "error_type": "connection", "response_time_ms": elapsed_ms},
+ )
+ raise
+ except requests.exceptions.RequestException as e:
+ elapsed_ms = round((time.monotonic() - start) * 1000, 1)
+ log_http.error(
+ f"REQUEST_ERROR {url}: {e}",
+ extra={"url": url, "error_type": "request", "response_time_ms": elapsed_ms},
+ )
+ raise
from typing import Optional as _Optional
@@ -610,9 +695,9 @@ def _log_enrichment_error(db: Session, *, report_id: _Optional[int], product_id:
attempt=attempt
))
db.commit()
- except Exception:
- pass
+ except Exception as exc:
# Avoid crashing on logging failures
+ log_db.warning(f"Failed to persist enrichment error: {exc}", exc_info=True)
db.rollback()
def load_report_products(db: Session, report_id: int):
@@ -703,7 +788,7 @@ def fetch_product_reviews(product_id: int, page: int = 0, page_size: int = 5):
if resp.status_code == 200:
return resp.json()
except Exception as e:
- print(f"Review API error for product {product_id}: {e}")
+ log_social.warning(f"Review API error for product {product_id}: {e}")
return None
@@ -750,7 +835,7 @@ def fetch_social_proof(product_ids: list):
return {"result": result} if result else data
except Exception as e:
- print(f"Social Proof API error: {e}")
+ log_social.warning(f"Social Proof API error: {e}")
return None
@@ -773,7 +858,7 @@ def _parse_social_count(count_str: str) -> int:
# Try to parse as float
return int(float(clean))
- except:
+ except (ValueError, TypeError, AttributeError):
return 0
@@ -794,7 +879,7 @@ def fetch_merchant_questions(product_id: int, page: int = 0, page_size: int = 4)
if resp.status_code == 200:
return resp.json()
except Exception as e:
- print(f"Merchant Questions API error for product {product_id}: {e}")
+ log_social.warning(f"Merchant Questions API error for product {product_id}: {e}")
return None
@@ -817,7 +902,7 @@ def fetch_similar_products(product_id: int, page: int = 0, page_size: int = 8):
if resp.status_code == 200:
return resp.json()
except Exception as e:
- print(f"Similar Products API error for product {product_id}: {e}")
+ log_social.warning(f"Similar Products API error for product {product_id}: {e}")
return None
@@ -835,7 +920,7 @@ def fetch_merchant_followers(merchant_id: int):
if resp.status_code == 200:
return resp.json()
except Exception as e:
- print(f"Merchant Followers API error for merchant {merchant_id}: {e}")
+ log_social.warning(f"Merchant Followers API error for merchant {merchant_id}: {e}")
return None
@@ -845,6 +930,7 @@ class CategoryBase(BaseModel):
parent_id: Optional[int] = None
trendyol_category_id: Optional[int] = None
trendyol_url: Optional[str] = None
+ path_model: Optional[str] = None
is_active: bool = True
@@ -857,6 +943,7 @@ class CategoryUpdate(BaseModel):
parent_id: Optional[int] = None
trendyol_category_id: Optional[int] = None
trendyol_url: Optional[str] = None
+ path_model: Optional[str] = None
is_active: Optional[bool] = None
@@ -958,6 +1045,7 @@ def get_main_categories(db: Session = Depends(get_db)):
"parent_id": cat.parent_id,
"trendyol_category_id": cat.trendyol_category_id,
"trendyol_url": cat.trendyol_url,
+ "path_model": cat.path_model,
"is_active": cat.is_active,
"created_at": cat.created_at,
"children_count": children_count
@@ -984,6 +1072,7 @@ def get_category(category_id: int, db: Session = Depends(get_db)):
"parent_id": category.parent_id,
"trendyol_category_id": category.trendyol_category_id,
"trendyol_url": category.trendyol_url,
+ "path_model": category.path_model,
"is_active": category.is_active,
"created_at": category.created_at,
"children_count": children_count
@@ -1023,6 +1112,7 @@ def get_category_children(category_id: int, db: Session = Depends(get_db)):
"parent_id": cat.parent_id,
"trendyol_category_id": cat.trendyol_category_id,
"trendyol_url": cat.trendyol_url,
+ "path_model": cat.path_model,
"is_active": cat.is_active,
"created_at": cat.created_at,
"children_count": children_count
@@ -1048,6 +1138,7 @@ def create_category(category: CategoryCreate, db: Session = Depends(get_db)):
parent_id=category.parent_id,
trendyol_category_id=category.trendyol_category_id,
trendyol_url=category.trendyol_url,
+ path_model=category.path_model,
is_active=category.is_active
)
@@ -1061,6 +1152,7 @@ def create_category(category: CategoryCreate, db: Session = Depends(get_db)):
"parent_id": db_category.parent_id,
"trendyol_category_id": db_category.trendyol_category_id,
"trendyol_url": db_category.trendyol_url,
+ "path_model": db_category.path_model,
"is_active": db_category.is_active,
"created_at": db_category.created_at,
"children_count": 0
@@ -1090,6 +1182,8 @@ def update_category(category_id: int, category: CategoryUpdate, db: Session = De
db_category.trendyol_category_id = category.trendyol_category_id
if category.trendyol_url is not None:
db_category.trendyol_url = category.trendyol_url
+ if category.path_model is not None:
+ db_category.path_model = category.path_model
if category.is_active is not None:
db_category.is_active = category.is_active
@@ -1105,6 +1199,7 @@ def update_category(category_id: int, category: CategoryUpdate, db: Session = De
"parent_id": db_category.parent_id,
"trendyol_category_id": db_category.trendyol_category_id,
"trendyol_url": db_category.trendyol_url,
+ "path_model": db_category.path_model,
"is_active": db_category.is_active,
"created_at": db_category.created_at,
"children_count": children_count
@@ -1141,6 +1236,7 @@ class BulkCategoryItem(BaseModel):
parent_name: Optional[str] = None
trendyol_category_id: Optional[int] = None
trendyol_url: Optional[str] = None
+ path_model: Optional[str] = None
class BulkCategoryImport(BaseModel):
categories: List[BulkCategoryItem]
@@ -1179,6 +1275,7 @@ def bulk_import_categories(data: BulkCategoryImport, db: Session = Depends(get_d
parent_id=parent_id,
trendyol_category_id=item.trendyol_category_id,
trendyol_url=item.trendyol_url,
+ path_model=item.path_model,
is_active=True
)
db.add(db_cat)
@@ -1195,6 +1292,19 @@ def bulk_import_categories(data: BulkCategoryImport, db: Session = Depends(get_d
}
+@app.post("/categories/seed-from-json")
+def seed_from_json_endpoint(clear_existing: bool = True):
+ """Seed categories from trendyol_categories.json file"""
+ from category_seeder import seed_from_json
+ try:
+ stats = seed_from_json(clear_existing=clear_existing)
+ return {"message": "Seed tamamlandı", **stats}
+ except FileNotFoundError:
+ raise HTTPException(status_code=404, detail="trendyol_categories.json not found")
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+
# Get all snapshots
@app.get("/snapshots", response_model=List[SnapshotResponse])
def get_snapshots(db: Session = Depends(get_db), skip: int = 0, limit: int = 100):
@@ -1261,9 +1371,9 @@ def get_category_products(category_id: int, db: Session = Depends(get_db)):
# Helper: recursively collect scrapable categories (those with trendyol_category_id)
def collect_scrapable_categories(db: Session, category_ids: list) -> list:
"""
- Given a list of category IDs, collect ALL leaf categories with valid trendyol_category_id.
+ Given a list of category IDs, collect ALL leaf categories with path_model or trendyol_category_id.
Always recurses into children to find every scrapable category in the tree.
- Returns list of (trendyol_category_id, name) tuples.
+ Returns list of (path_model, name, trendyol_category_id) tuples.
"""
result = []
seen = set()
@@ -1282,9 +1392,12 @@ def collect_scrapable_categories(db: Session, category_ids: list) -> list:
# Has children — recurse deeper
child_ids = [c.id for c in children]
_collect(child_ids)
+ elif cat.path_model:
+ # Leaf category with path_model — scrape via Search API
+ result.append((cat.path_model, cat.name, cat.trendyol_category_id))
elif cat.trendyol_category_id:
- # Leaf category with trendyol_category_id — add to results
- result.append((cat.trendyol_category_id, cat.name))
+ # Fallback: no path_model but has category_id (legacy)
+ result.append((None, cat.name, cat.trendyol_category_id))
_collect(category_ids)
return result
@@ -1314,28 +1427,28 @@ def scrape_category_data(category_id: int, db: Session = Depends(get_db)):
categories_to_scrape = collect_scrapable_categories(db, sub_ids)
if not categories_to_scrape:
- raise HTTPException(status_code=400, detail="No valid Trendyol IDs found in this category or its subcategories")
+ raise HTTPException(status_code=400, detail="No scrapable categories found (missing path_model/trendyol_category_id)")
- # Start scraping
- results = scrape_multiple_categories(categories_to_scrape, delay=2.0)
+ # Start scraping — convert to legacy format for scrape_multiple_categories
+ # Only categories with trendyol_category_id can use the old API
+ legacy_cats = [(cat_id, name) for (pm, name, cat_id) in categories_to_scrape if cat_id]
+ results = scrape_multiple_categories(legacy_cats, delay=2.0) if legacy_cats else {
+ "successful": 0, "failed": 0, "total_products": 0, "details": []
+ }
# Create snapshots for successful scrapes
for detail in results["details"]:
if detail["success"]:
- pass
- # Find the category in DB
sub_cat = db.query(Category).filter(
Category.trendyol_category_id == detail["category_id"]
).first()
if sub_cat:
- pass
- # Create snapshot
snapshot = Snapshot(
category_id=sub_cat.id,
snapshot_month=datetime.now().strftime("%Y-%m"),
total_products=detail["total_products"],
- avg_price=0, # Calculate from products if needed
+ avg_price=0,
json_file_path=detail["file_path"],
scraped_at=datetime.now()
)
@@ -1416,19 +1529,16 @@ async def create_report(
SYNCHRONOUS: Report only saved when 100% complete
Accepts GET request for EventSource compatibility
"""
- # print(f"🔍 DEBUG - Received request:")
- print(f" - name: {name}")
- print(f" - category_id: {category_id}")
- print(f" - subcategory_ids (raw): {subcategory_ids}")
+ log_api.info(f"Report create request: name={name}, category_id={category_id}, subcategory_ids={subcategory_ids}")
# Parse subcategory_ids if provided
parsed_subcategory_ids = None
if subcategory_ids:
try:
parsed_subcategory_ids = json_module.loads(subcategory_ids)
- print(f" - subcategory_ids (parsed): {parsed_subcategory_ids}")
+ log_api.debug(f"Parsed subcategory_ids: {parsed_subcategory_ids}")
except Exception as e:
- print(f" - ❌ Error parsing subcategory_ids: {e}")
+ log_api.warning(f"Error parsing subcategory_ids: {e}")
parsed_subcategory_ids = None
# Get main category
@@ -1464,7 +1574,7 @@ async def create_report(
categories_to_scrape = collect_scrapable_categories(db, sub_ids)
if not categories_to_scrape:
- raise HTTPException(status_code=400, detail="No valid Trendyol IDs found in this category or its subcategories")
+ raise HTTPException(status_code=400, detail="No scrapable categories found (missing path_model/trendyol_category_id)")
# Generate unique task ID
task_id = str(uuid.uuid4())
@@ -1488,8 +1598,10 @@ async def create_report(
# Stream progress with SSE
async def progress_stream():
"""Generator that yields real-time progress events"""
+ set_correlation_id(task_id)
+ set_report_id(category_id)
+ log_sse.info(f"SSE stream started: task={task_id}, category={main_category.name}")
try:
- pass
# Send initial info
yield f"data: {json_module.dumps({'type': 'info', 'message': f'📂 {main_category.name} kategorisi seçildi', 'progress': 0})}\n\n"
await asyncio.sleep(0.1)
@@ -1501,7 +1613,7 @@ async def create_report(
await asyncio.sleep(0.5)
# Start synchronous scraping with progress updates
- from scraper import TrendyolScraper
+ from scraper import TrendyolSearchScraper, TrendyolScraper
import json
import os
from datetime import datetime
@@ -1515,29 +1627,64 @@ async def create_report(
}
# Scrape each category with real-time updates
- for idx, (cat_id, cat_name) in enumerate(categories_to_scrape, 1):
+ for idx, (path_model, cat_name, cat_id) in enumerate(categories_to_scrape, 1):
progress = int((idx / len(categories_to_scrape)) * 80) + 10
yield f"data: {json_module.dumps({'type': 'processing', 'message': f'🔍 [{idx}/{len(categories_to_scrape)}] {cat_name} çekiliyor...', 'progress': progress, 'current': idx, 'total': len(categories_to_scrape)})}\n\n"
await asyncio.sleep(0.1)
try:
- pass
- # API call notification
- yield f"data: {json_module.dumps({'type': 'api', 'message': f'🌐 API: Trendyol Best Seller - Kategori ID: {cat_id}', 'progress': progress})}\n\n"
- await asyncio.sleep(0.1)
+ if path_model:
+ # New Search API — works for both -c and -s categories
+ yield f"data: {json_module.dumps({'type': 'api', 'message': f'🌐 API: Trendyol Search - {path_model}', 'progress': progress})}\n\n"
+ await asyncio.sleep(0.1)
- scraper = TrendyolScraper(cat_id)
- products = scraper.fetch_all_products()
+ scraper = TrendyolSearchScraper(path_model)
+ products = await asyncio.get_event_loop().run_in_executor(None, scraper.fetch_all_products)
+
+ # Search API socialProofs boş döner — Top Rankings API'den zenginleştir
+ if products and cat_id and not any(p.get("socialProofs") for p in products):
+ try:
+ top_scraper = TrendyolScraper(cat_id, page_size=20)
+ top_products = await asyncio.get_event_loop().run_in_executor(
+ None, lambda: top_scraper.fetch_all_products(delay=0.5, max_pages=5)
+ )
+ if top_products:
+ # ID bazlı socialProofs eşleştirme
+ social_map = {}
+ for tp in top_products:
+ tid = tp.get("id") or tp.get("contentId")
+ sp = tp.get("socialProofs", [])
+ if tid and sp:
+ social_map[int(tid)] = sp
+ if social_map:
+ for p in products:
+ pid = p.get("id")
+ if pid and int(pid) in social_map:
+ p["socialProofs"] = social_map[int(pid)]
+ log_sse.info(f"Enriched {len(social_map)} products with socialProofs from Top Rankings API")
+ except Exception as e:
+ log_sse.warning(f"Top Rankings socialProofs enrichment failed: {e}")
+
+ elif cat_id:
+ # Legacy fallback — old top-rankings API
+ yield f"data: {json_module.dumps({'type': 'api', 'message': f'🌐 API: Trendyol Best Seller - Kategori ID: {cat_id}', 'progress': progress})}\n\n"
+ await asyncio.sleep(0.1)
+
+ scraper = TrendyolScraper(cat_id)
+ products = await asyncio.get_event_loop().run_in_executor(None, scraper.fetch_all_products)
+ else:
+ products = []
if products:
- pass
- # Save to file
+ # Save to file — use cat_id if available, else derive from path_model
os.makedirs(CATEGORIES_DIR, exist_ok=True)
- filename = f"{CATEGORIES_DIR}/{cat_name.replace(' ', '_')}_{cat_id}.json"
+ file_id = cat_id if cat_id else path_model.replace("/", "_")
+ filename = f"{CATEGORIES_DIR}/{cat_name.replace(' ', '_')}_{file_id}.json"
data = {
"category_id": cat_id,
+ "path_model": path_model,
"category_name": cat_name,
"total_products": len(products),
"scraped_at": datetime.now().isoformat(),
@@ -1550,6 +1697,7 @@ async def create_report(
results["total_products"] += len(products)
results["details"].append({
"category_id": cat_id,
+ "path_model": path_model,
"category_name": cat_name,
"success": True,
"total_products": len(products),
@@ -1562,6 +1710,7 @@ async def create_report(
results["failed"] += 1
results["details"].append({
"category_id": cat_id,
+ "path_model": path_model,
"category_name": cat_name,
"success": False,
"total_products": 0,
@@ -1574,6 +1723,7 @@ async def create_report(
results["failed"] += 1
results["details"].append({
"category_id": cat_id,
+ "path_model": path_model,
"category_name": cat_name,
"success": False,
"total_products": 0,
@@ -1585,127 +1735,8 @@ async def create_report(
# Rate limiting (non-blocking)
await asyncio.sleep(1.5)
- # ============================================
- # Sosyal Kanıt Verilerini Topla
- # ============================================
- # print(f"\n🔍 DEBUG: Sosyal kanıt toplama bölümüne ulaşıldı")
- # print(f"🔍 DEBUG: results['details'] uzunluğu: {len(results.get('details', []))}")
-
- yield f"data: {json_module.dumps({'type': 'info', 'message': '📊 Sosyal kanıt verileri toplanıyor...', 'progress': 85})}\n\n"
- await asyncio.sleep(0.5)
-
- # Collect all product IDs from scraped data AND product info
- all_product_ids = []
- product_info_map = {} # Map product_id to product info (name, image, url, category)
- for detail in results["details"]:
- if detail["success"] and detail["file_path"]:
- category_name = detail.get("category_name", "Bilinmeyen Kategori")
- try:
- cat_data = await asyncio.to_thread(_read_json, detail["file_path"])
- products = cat_data.get("products", [])
- # print(f"🔍 DEBUG: {detail['file_path']} dosyasından {len(products)} ürün bulundu")
- for product in products:
- product_id = product.get("id")
- if product_id:
- all_product_ids.append(int(product_id))
- # Extract rating data
- rating_score_obj = product.get("ratingScore", {})
- rating = rating_score_obj.get("averageRating", 0) if isinstance(rating_score_obj, dict) else 0
- rating_count = rating_score_obj.get("totalCount", 0) if isinstance(rating_score_obj, dict) else 0
-
- # Extract barcode from first variant
- barcode = ""
- merchant_listings = product.get("merchantListings", [])
- if merchant_listings and len(merchant_listings) > 0:
- variants = merchant_listings[0].get("variants", [])
- if variants and len(variants) > 0:
- barcode = variants[0].get("barcode", "")
-
- # Store product info with category, brand, price, rating, and barcode
- product_info_map[str(product_id)] = {
- "name": product.get("name", ""),
- "imageUrl": product.get("imageUrl", ""),
- "url": product.get("url", ""),
- "category": category_name,
- "brand": product.get("brand", {}).get("name", "Bilinmeyen Marka"),
- "price": product.get("price", {}).get("sellingPrice", 0),
- "rating": round(rating, 2) if rating else 0,
- "rating_count": rating_count,
- "barcode": barcode,
- "barcode_country": get_country_from_barcode(barcode), # Extract country from barcode prefix
- "origin_country": "Bilinmeyen" # Not available in product data
- }
- except Exception as e:
- pass
- # print(f"⚠️ DEBUG: Dosya okuma hatası {detail['file_path']}: {str(e)}")
- pass
-
- # Collect social proof data in batches
- social_proof_data = {}
- total_products = len(all_product_ids)
- processed = 0
- batch_size = 20
-
- # print(f"🔍 DEBUG: Toplam {total_products} ürün ID'si toplandı")
- # print(f"🔍 DEBUG: İlk 5 ürün ID'si: {all_product_ids[:5] if all_product_ids else 'YOK'}")
-
- if total_products > 0:
- pass
- # print(f"✅ DEBUG: total_products > 0 koşulu sağlandı, sosyal kanıt toplama başlıyor")
- for chunk in _chunked(all_product_ids, batch_size):
- try:
- pass
- # print(f"🔍 DEBUG: {len(chunk)} ürün için sosyal kanıt API'ye istek gönderiliyor: {chunk}")
- data = fetch_social_proof(chunk)
- # print(f"🔍 DEBUG: API yanıtı alındı: {type(data)}, 'result' var mı: {'result' in data if data else False}")
- if data and "result" in data:
- items = data.get("result", [])
- # print(f"🔍 DEBUG: {len(items)} adet sonuç bulundu")
- for item in items:
- pid = item.get("contentId")
- if pid:
- pid_str = str(pid)
- # Get product info from map
- product_info = product_info_map.get(pid_str, {})
- social_proof_data[pid_str] = {
- "page_views": item.get("pageViewCount", 0),
- "orders": item.get("orderCount", 0),
- "baskets": item.get("basketCount", 0),
- "favorites": item.get("favoriteCount", 0),
- "name": product_info.get("name", ""),
- "imageUrl": product_info.get("imageUrl", ""),
- "url": product_info.get("url", ""),
- "category": product_info.get("category", "Bilinmeyen Kategori"),
- "brand": product_info.get("brand", "Bilinmeyen Marka"),
- "price": product_info.get("price", 0),
- "rating": product_info.get("rating", 0),
- "rating_count": product_info.get("rating_count", 0),
- "barcode": product_info.get("barcode", ""),
- "origin_country": product_info.get("origin_country", "Bilinmeyen")
- }
- except Exception as e:
- pass
- # print(f"❌ DEBUG: Sosyal kanıt API hatası: {str(e)}")
- pass
-
- processed += len(chunk)
- progress_pct = int((processed / total_products) * 5) + 85 # 85-90%
- yield f"data: {json_module.dumps({'type': 'info', 'message': f'📊 Sosyal kanıt: {processed}/{total_products} ürün', 'progress': progress_pct})}\n\n"
- # SSE keepalive heartbeat every 10 batches
- if processed % (batch_size * 10) == 0:
- yield f": heartbeat\n\n"
- await asyncio.sleep(0.3) # Rate limiting (non-blocking)
-
- # print(f"✅ DEBUG: Sosyal kanıt toplama tamamlandı. Toplanan veri: {len(social_proof_data)} ürün")
- yield f"data: {json_module.dumps({'type': 'success', 'message': f'✅ Sosyal kanıt verileri toplandı ({len(social_proof_data)} ürün)', 'progress': 90})}\n\n"
- await asyncio.sleep(0.3)
- else:
- pass
- # print(f"⚠️ DEBUG: total_products = 0, sosyal kanıt toplanmadı")
- pass
-
# Generate report file
- yield f"data: {json_module.dumps({'type': 'info', 'message': '📝 Rapor dosyası oluşturuluyor...', 'progress': 92})}\n\n"
+ yield f"data: {json_module.dumps({'type': 'info', 'message': '📝 Rapor dosyası oluşturuluyor...', 'progress': 88})}\n\n"
await asyncio.sleep(0.5)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -1727,7 +1758,7 @@ async def create_report(
await asyncio.to_thread(_write_json, json_filename, combined_data)
# Save to database
- yield f"data: {json_module.dumps({'type': 'info', 'message': '💾 Veritabanına kaydediliyor...', 'progress': 95})}\n\n"
+ yield f"data: {json_module.dumps({'type': 'info', 'message': '💾 Veritabanına kaydediliyor...', 'progress': 93})}\n\n"
await asyncio.sleep(0.5)
new_report = Report(
@@ -1742,38 +1773,64 @@ async def create_report(
await asyncio.to_thread(_db_save, db, new_report)
- # Save social proof data to persistent cache
- # print(f"\n🔍 DEBUG: Sosyal kanıt kaydetme bölümü - social_proof_data uzunluğu: {len(social_proof_data)}")
- if social_proof_data:
- enrich_dir = f"{REPORTS_DIR}/enrich_{new_report.id}"
- os.makedirs(enrich_dir, exist_ok=True)
- social_file = f"{enrich_dir}/social.json"
+ # Start enrichment in background thread (survives client disconnect)
+ import threading
+ report_id_for_enrich = new_report.id
+ enrichment_progress[report_id_for_enrich] = {"status": "queued", "step": "queued"}
+ threading.Thread(
+ target=_enrich_report_task,
+ args=(report_id_for_enrich,),
+ daemon=True
+ ).start()
+ log_sse.info(f"Background enrichment started for report {report_id_for_enrich}")
- social_output = {
- "products": len(all_product_ids),
- "total": {
- "page_views": sum(d.get("page_views", 0) for d in social_proof_data.values()),
- "orders": sum(d.get("orders", 0) for d in social_proof_data.values()),
- "baskets": sum(d.get("baskets", 0) for d in social_proof_data.values()),
- "favorites": sum(d.get("favorites", 0) for d in social_proof_data.values())
- },
- "missing": total_products - len(social_proof_data),
- "details": social_proof_data
- }
+ # Wait for enrichment to complete, sending progress updates via SSE
+ yield f"data: {json_module.dumps({'type': 'info', 'message': '📊 Sosyal kanıt verileri toplanıyor...', 'progress': 90})}\n\n"
+ await asyncio.sleep(0.5)
- # print(f"✅ DEBUG: Sosyal kanıt dosyası kaydediliyor: {social_file}")
- # print(f"🔍 DEBUG: Toplam metrikler: {social_output['total']}")
- await asyncio.to_thread(_write_json, social_file, social_output)
- # print(f"✅ DEBUG: Sosyal kanıt dosyası başarıyla kaydedildi")
+ progress_key = f"social_{report_id_for_enrich}"
+ max_wait = 600 # 10 dakika max
+ waited = 0
+ while waited < max_wait:
+ # Check enrichment task status
+ enrich_status = enrichment_progress.get(report_id_for_enrich) or {}
+ if enrich_status.get("status") in ("completed", "error"):
+ break
+
+ # Check social proof progress
+ social_progress = enrichment_progress.get(progress_key) or {}
+ sp_processed = social_progress.get("processed", 0)
+ sp_total = social_progress.get("total", 0)
+ sp_pct = social_progress.get("progress", 0)
+
+ if sp_total > 0:
+ overall_pct = 90 + int(sp_pct * 0.09) # 90-99 arası
+ yield f"data: {json_module.dumps({'type': 'info', 'message': f'📊 Sosyal kanıt: {sp_processed}/{sp_total} ürün (%{sp_pct})', 'progress': overall_pct})}\n\n"
+
+ await asyncio.sleep(3)
+ waited += 3
+
+ # Final status check
+ enrich_status = enrichment_progress.get(report_id_for_enrich) or {}
+ if enrich_status.get("status") == "completed":
+ yield f"data: {json_module.dumps({'type': 'info', 'message': '✅ Sosyal kanıt tamamlandı!', 'progress': 99})}\n\n"
+ elif enrich_status.get("status") == "error":
+ err_msg = str(enrich_status.get("error", ""))[:100]
+ yield f"data: {json_module.dumps({'type': 'warning', 'message': f'⚠️ Sosyal kanıt hatası: {err_msg}', 'progress': 99})}\n\n"
else:
- pass
- # print(f"⚠️ DEBUG: social_proof_data boş, dosya kaydedilmedi")
+ yield f"data: {json_module.dumps({'type': 'warning', 'message': '⚠️ Sosyal kanıt zaman aşımı, arka planda devam ediyor...', 'progress': 99})}\n\n"
- # Final success message with report ID
- yield f"data: {json_module.dumps({'type': 'complete', 'message': '✅ Rapor başarıyla oluşturuldu!', 'progress': 100, 'report_id': new_report.id, 'total_products': results['total_products'], 'successful': results['successful']})}\n\n"
await asyncio.sleep(0.1)
+ # Final success message with report ID
+ yield f"data: {json_module.dumps({'type': 'complete', 'message': '✅ Rapor başarıyla oluşturuldu!', 'progress': 100, 'report_id': new_report.id, 'total_products': results['total_products'], 'successful': results['successful'], 'enrichment_status': enrich_status.get('status', 'unknown')})}\n\n"
+ await asyncio.sleep(0.1)
+
+ except asyncio.CancelledError:
+ log_sse.warning(f"SSE stream cancelled (client disconnect): task={task_id}")
+ return
except Exception as e:
+ log_sse.error(f"SSE stream error: task={task_id}, error={e}", exc_info=True)
yield f"data: {json_module.dumps({'type': 'error', 'message': f'❌ Kritik hata: {str(e)}', 'progress': -1})}\n\n"
return StreamingResponse(progress_stream(), media_type="text/event-stream")
@@ -1818,7 +1875,7 @@ def get_scraping_progress(task_id: str):
# Background task for scraping
def scrape_in_background(task_id: str, report_name: str, category_id: int, categories_to_scrape: list, category_name: str):
"""Background task that handles scraping with progress updates"""
- from scraper import TrendyolScraper
+ from scraper import TrendyolSearchScraper, TrendyolScraper
import json
import os
from datetime import datetime
@@ -1858,7 +1915,7 @@ def scrape_in_background(task_id: str, report_name: str, category_id: int, categ
"details": []
}
- for idx, (cat_id, cat_name) in enumerate(categories_to_scrape, 1):
+ for idx, (path_model, cat_name, cat_id) in enumerate(categories_to_scrape, 1):
scraping_progress[task_id]["current"] = idx
scraping_progress[task_id]["current_category"] = cat_name
scraping_progress[task_id]["progress"] = int((idx / len(categories_to_scrape)) * 80) + 10
@@ -1866,17 +1923,27 @@ def scrape_in_background(task_id: str, report_name: str, category_id: int, categ
add_log(f"🔍 [{idx}/{len(categories_to_scrape)}] {cat_name} çekiliyor...")
try:
- scraper = TrendyolScraper(cat_id)
+ if path_model:
+ scraper = TrendyolSearchScraper(path_model)
+ elif cat_id:
+ scraper = TrendyolScraper(cat_id)
+ else:
+ add_log(f"⚠️ {cat_name} - path_model veya cat_id yok, atlanıyor", "warning")
+ results["failed"] += 1
+ scraping_progress[task_id]["failed"] += 1
+ continue
+
products = scraper.fetch_all_products()
if products:
- pass
# Save to file
os.makedirs(CATEGORIES_DIR, exist_ok=True)
- filename = f"{CATEGORIES_DIR}/{cat_name.replace(' ', '_')}_{cat_id}.json"
+ file_id = cat_id if cat_id else path_model.replace("/", "_")
+ filename = f"{CATEGORIES_DIR}/{cat_name.replace(' ', '_')}_{file_id}.json"
data = {
"category_id": cat_id,
+ "path_model": path_model,
"category_name": cat_name,
"total_products": len(products),
"scraped_at": datetime.now().isoformat(),
@@ -1890,6 +1957,7 @@ def scrape_in_background(task_id: str, report_name: str, category_id: int, categ
results["total_products"] += len(products)
results["details"].append({
"category_id": cat_id,
+ "path_model": path_model,
"category_name": cat_name,
"success": True,
"total_products": len(products),
@@ -1904,6 +1972,7 @@ def scrape_in_background(task_id: str, report_name: str, category_id: int, categ
results["failed"] += 1
results["details"].append({
"category_id": cat_id,
+ "path_model": path_model,
"category_name": cat_name,
"success": False,
"total_products": 0,
@@ -1916,6 +1985,7 @@ def scrape_in_background(task_id: str, report_name: str, category_id: int, categ
results["failed"] += 1
results["details"].append({
"category_id": cat_id,
+ "path_model": path_model,
"category_name": cat_name,
"success": False,
"total_products": 0,
@@ -2038,1114 +2108,30 @@ DASHBOARD_CACHE_TTL = 3600 # 1 hour in seconds
@app.get("/api/reports/{report_id}/dashboard-data")
def get_dashboard_data(report_id: int, db: Session = Depends(get_db)):
"""
- Process report data and return dashboard KPIs and chart data (with caching)
+ Dashboard verisi döndür — konsolide dosya varsa oku, yoksa yerinde oluştur.
"""
- import json
- import os
- from collections import defaultdict
+ from data_consolidator import load_consolidated_report, build_consolidated_report, save_consolidated_report
- # Check cache first
- cache_key = f"dashboard_{report_id}"
- # TEMPORARILY DISABLED FOR DEBUGGING - Re-enable after fixing category sales
- # if cache_key in dashboard_cache:
- # cached_data, cached_time = dashboard_cache[cache_key]
- # if time.time() - cached_time < DASHBOARD_CACHE_TTL:
- # print(f"📊 Cache hit for report {report_id}")
- # return cached_data
- print(f"📊 Cache bypassed for debugging - recalculating dashboard data for report {report_id}")
+ # 1. Konsolide dosyayı oku (hızlı yol)
+ data = load_consolidated_report(report_id, REPORTS_DIR)
+ if data:
+ log_cache.info(f"Konsolide dosyadan yüklendi: report {report_id}")
+ return data
- # Get report from database
+ # 2. Eski raporlar için fallback: yerinde oluştur ve kaydet (lazy migration)
+ log_cache.info(f"Konsolide dosya yok, oluşturuluyor: report {report_id}")
report = db.query(Report).filter(Report.id == report_id).first()
if not report:
raise HTTPException(status_code=404, detail="Report not found")
-
- # Read report JSON file
if not report.json_file_path or not os.path.exists(report.json_file_path):
raise HTTPException(status_code=404, detail="Report data file not found")
- try:
- with open(report.json_file_path, 'r', encoding='utf-8') as f:
- report_data = json.load(f)
- except Exception as e:
- raise HTTPException(status_code=500, detail=f"Error reading report file: {str(e)}")
+ data = build_consolidated_report(report_id, db, REPORTS_DIR)
+ if not data:
+ raise HTTPException(status_code=500, detail="Failed to build consolidated report")
- # Load all product data from category files
- all_products = []
- categories_data = defaultdict(list)
- brands_data = defaultdict(int)
-
- for detail in report_data.get("details", []):
- if detail.get("success") and detail.get("file_path"):
- file_path = detail["file_path"]
- if os.path.exists(file_path):
- try:
- with open(file_path, 'r', encoding='utf-8') as f:
- cat_data = json.load(f)
- products = cat_data.get("products", [])
-
- # Enrich products with category name from report details
- # Clean category name: remove trailing ID pattern (e.g., "Android Cep Telefonu 164461" → "Android Cep Telefonu")
- cat_name_raw = detail["category_name"]
- # Remove trailing space + numbers pattern
- cat_name = re.sub(r'\s+\d+$', '', cat_name_raw)
-
- for product in products:
- # Update category field with actual name
- if isinstance(product.get("category"), dict):
- product["category"]["name"] = cat_name
- else:
- product["category"] = {"id": 0, "name": cat_name}
-
- all_products.extend(products)
-
- # Group by category
- categories_data[cat_name].extend(products)
-
- # Count brands
- for product in products:
- brand_name = product.get("brand", {}).get("name", "Unknown")
- brands_data[brand_name] += 1
- except:
- continue
-
- # Calculate KPIs
- total_products = len(all_products)
- total_brands = len(brands_data)
-
- # Price calculations
- prices = [p.get("price", {}).get("sellingPrice", 0) for p in all_products if p.get("price", {}).get("sellingPrice")]
- avg_price = sum(prices) / len(prices) if prices else 0
- min_price = min(prices) if prices else 0
- max_price = max(prices) if prices else 0
-
- # DISABLED: Discount calculations (not needed per user request)
- # discounted_count = sum(1 for p in all_products if p.get("price", {}).get("discountedPrice") and p.get("price", {}).get("discountedPrice") < p.get("price", {}).get("originalPrice", 0))
- # discount_rate = (discounted_count / total_products * 100) if total_products > 0 else 0
-
- # DISABLED: Stock calculations (not needed per user request)
- # in_stock_count = sum(1 for p in all_products if p.get("inStock", False))
- # out_of_stock_count = total_products - in_stock_count
- # running_out_count = sum(1 for p in all_products if p.get("isRunningOut", False))
-
- # Rating calculations
- ratings = []
- for p in all_products:
- rating = p.get("rating", 0)
- # Handle if rating is a dict (ratingScore)
- if isinstance(rating, dict):
- rating = rating.get("averageRating", 0)
- if rating:
- ratings.append(rating)
- avg_rating = sum(ratings) / len(ratings) if ratings else 0
-
- # DISABLED: Flash products and discount calculations (not needed per user request)
- # flash_count = sum(1 for p in all_products if p.get("isFlash", False))
-
- # Advanced KPIs
- # DISABLED: 1. Discount Depth (average discount percentage for discounted products)
- # discount_depths = []
- # for p in all_products:
- # original = p.get("price", {}).get("originalPrice", 0)
- # discounted = p.get("price", {}).get("discountedPrice", 0)
- # if original > 0 and discounted > 0 and discounted < original:
- # discount_depths.append((original - discounted) / original * 100)
- # avg_discount_depth = sum(discount_depths) / len(discount_depths) if discount_depths else 0
-
- # 2. Median Price (for price premium index calculation) - DOĞRU HESAPLAMA
- median_price = float(np.percentile(prices, 50)) if prices else 0
-
- # DISABLED: 3. Stock Risk Metric (running_out / in_stock ratio) - not needed per user request
- # stock_risk = (running_out_count / in_stock_count * 100) if in_stock_count > 0 else 0
-
- # 4. Low Rating Products Count (rating < 3.0)
- low_rating_count = sum(1 for r in ratings if r < 3.0)
- low_rating_rate = (low_rating_count / len(ratings) * 100) if ratings else 0
-
- # KPIs
- kpis = {
- "total_products": total_products,
- "total_subcategories": report.total_subcategories,
- "total_brands": total_brands,
- "avg_price": round(avg_price, 2),
- "median_price": round(median_price, 2),
- # DISABLED: Discount-related KPIs (not needed per user request)
- # "discounted_products": discounted_count,
- # "discount_rate": round(discount_rate, 2),
- # "avg_discount_depth": round(avg_discount_depth, 2),
- # DISABLED: Stock-related KPIs (not needed per user request)
- # "out_of_stock": out_of_stock_count,
- # "in_stock": in_stock_count,
- # "running_out": running_out_count,
- # "stock_risk": round(stock_risk, 2),
- "avg_rating": round(avg_rating, 2),
- "low_rating_count": low_rating_count,
- "low_rating_rate": round(low_rating_rate, 2),
- # DISABLED: Flash products (not needed per user request)
- # "flash_products": flash_count,
- "min_price": round(min_price, 2),
- "max_price": round(max_price, 2)
- }
-
- # Price distribution (for bar chart)
- price_ranges = {
- "0-100": 0,
- "100-250": 0,
- "250-500": 0,
- "500-1000": 0,
- "1000+": 0
- }
- for price in prices:
- if price < 100:
- price_ranges["0-100"] += 1
- elif price < 250:
- price_ranges["100-250"] += 1
- elif price < 500:
- price_ranges["250-500"] += 1
- elif price < 1000:
- price_ranges["500-1000"] += 1
- else:
- price_ranges["1000+"] += 1
-
- # Top 10 categories by sales (orders from social proof data)
- # First, try to get social proof data to calculate by sales
- category_sales = {}
- try:
- pass
- # Try to get social proof data - check for different batch sizes
- # The social proof cache uses format: {report_id}:b{batch_size}
- # Try common batch sizes: 100, 5 (default), 10, 20
- social_data = None
- for batch_size in [100, 5, 10, 20]:
- social_cache_key = f"{report_id}:b{batch_size}"
- if social_cache_key in social_proof_cache:
- social_data = social_proof_cache.get(social_cache_key)
- if social_data:
- pass
- # print(f"[DEBUG] Found social proof cache with batch_size={batch_size}")
- break
-
- # If not in cache, try loading from persisted JSON
- if not social_data:
- pass
- # print(f"[DEBUG] No social proof cache found, trying persisted JSON")
- persisted = _load_json(f"{REPORTS_DIR}/enrich_{report_id}/social.json")
- if persisted:
- social_data = {
- "details": persisted.get("details", {}),
- "aggregation": {
- "products": persisted.get("products", 0),
- "total": persisted.get("total", {}),
- "missing": persisted.get("missing", 0)
- }
- }
- # Cache it for future use with batch_size=5 (default)
- social_proof_cache.set(f"{report_id}:b5", social_data)
- # print(f"[DEBUG] Loaded social proof data from JSON with {len(social_data['details'])} products")
- else:
- pass
- # print(f"[DEBUG] No persisted social proof JSON found for report {report_id}")
-
- if social_data:
- social_details = social_data.get("details", {})
- # print(f"[DEBUG] Found social data with {len(social_details)} products")
-
- # Calculate sales per category
- for cat_name, cat_products in categories_data.items():
- total_orders = 0
- for product in cat_products:
- pid = product.get("id")
- if pid and str(pid) in social_details:
- product_orders = social_details[str(pid)].get("orders", 0)
- total_orders += product_orders
- if product_orders > 0:
- pass
- # print(f"[DEBUG] Product {pid} in {cat_name}: {product_orders} orders")
-
- category_sales[cat_name] = {
- "name": cat_name,
- "count": len(cat_products),
- "total_orders": total_orders
- }
- # print(f"[DEBUG] Category {cat_name}: {total_orders} total orders from {len(cat_products)} products")
-
- # Sort by total_orders (sales)
- top_categories = sorted(
- category_sales.values(),
- key=lambda x: x["total_orders"],
- reverse=True
- )[:20]
- # print(f"[DEBUG] Top categories sorted by orders: {[(c['name'], c['total_orders']) for c in top_categories[:3]]}")
- else:
- pass
- # Fallback: If no social proof data, sort by product count
- top_categories = sorted(
- [{"name": cat, "count": len(products), "total_orders": 0} for cat, products in categories_data.items()],
- key=lambda x: x["count"],
- reverse=True
- )[:20]
- except Exception as e:
- pass
- # print(f"[DEBUG] Error calculating category sales: {str(e)}")
- import traceback
- traceback.print_exc()
- # Fallback: If any error, sort by product count
- top_categories = sorted(
- [{"name": cat, "count": len(products), "total_orders": 0} for cat, products in categories_data.items()],
- key=lambda x: x["count"],
- reverse=True
- )[:20]
-
- # Top 10 brands by product count
- top_brands = sorted(
- [{"name": brand, "count": count} for brand, count in brands_data.items()],
- key=lambda x: x["count"],
- reverse=True
- )[:20]
-
- # DISABLED: Stock status distribution (for pie chart) - not needed per user request
- # stock_status = {
- # "in_stock": in_stock_count,
- # "out_of_stock": out_of_stock_count,
- # "running_out": running_out_count
- # }
-
- # Rating distribution
- rating_distribution = {
- "0-1": 0,
- "1-2": 0,
- "2-3": 0,
- "3-4": 0,
- "4-5": 0
- }
- for product in all_products:
- rating = product.get("rating", 0)
- # Handle if rating is a dict (ratingScore)
- if isinstance(rating, dict):
- rating = rating.get("averageRating", 0)
-
- if rating < 1:
- rating_distribution["0-1"] += 1
- elif rating < 2:
- rating_distribution["1-2"] += 1
- elif rating < 3:
- rating_distribution["2-3"] += 1
- elif rating < 4:
- rating_distribution["3-4"] += 1
- else:
- rating_distribution["4-5"] += 1
-
- # Boxplot data (brand price statistics) - Top 10 brands
- brand_price_stats = []
- for brand_name in [b["name"] for b in top_brands[:10]]:
- brand_products = [p for p in all_products if p.get("brand", {}).get("name") == brand_name]
- brand_prices = [p.get("price", {}).get("sellingPrice", 0) for p in brand_products if p.get("price", {}).get("sellingPrice")]
-
- if brand_prices and len(brand_prices) >= 4: # En az 4 veri noktası gerekli
- # DOĞRU İSTATİSTİK: numpy percentile kullanımı
- percentiles = np.percentile(brand_prices, [0, 25, 50, 75, 100])
- brand_price_stats.append({
- "brand": brand_name,
- "min": round(float(percentiles[0]), 2),
- "q1": round(float(percentiles[1]), 2),
- "median": round(float(percentiles[2]), 2),
- "q3": round(float(percentiles[3]), 2),
- "max": round(float(percentiles[4]), 2),
- "count": len(brand_prices)
- })
-
- # Scatter plot data (price vs rating) - Sample 500 products for performance
- scatter_data = []
- sample_size = min(500, len(all_products))
- sampled_products = random.sample(all_products, sample_size)
-
- for p in sampled_products:
- price = p.get("price", {}).get("sellingPrice", 0)
- rating = p.get("rating", 0)
- if isinstance(rating, dict):
- rating = rating.get("averageRating", 0)
-
- if price > 0 and rating > 0:
- scatter_data.append({
- "price": round(price, 2),
- "rating": round(rating, 2),
- "brand": p.get("brand", {}).get("name", "Unknown"),
- "in_stock": p.get("inStock", False)
- })
-
- # Low rating products (rating < 3.0) - Top 20
- low_rating_products = []
- for p in all_products:
- rating = p.get("rating", 0)
- if isinstance(rating, dict):
- rating = rating.get("averageRating", 0)
-
- if rating > 0 and rating < 3.0:
- low_rating_products.append({
- "name": p.get("name", "Unknown")[:50],
- "brand": p.get("brand", {}).get("name", "Unknown"),
- "rating": round(rating, 2),
- "price": round(p.get("price", {}).get("sellingPrice", 0), 2),
- "in_stock": p.get("inStock", False)
- })
-
- low_rating_products = sorted(low_rating_products, key=lambda x: x["rating"])[:20]
-
- # Brand strength score (normalized metrics)
- brand_strength_scores = []
- for brand_name in [b["name"] for b in top_brands[:10]]:
- brand_products = [p for p in all_products if p.get("brand", {}).get("name") == brand_name]
- brand_count = len(brand_products)
- brand_share = (brand_count / total_products * 100) if total_products > 0 else 0
-
- # Brand ratings
- brand_ratings = []
- for p in brand_products:
- rating = p.get("rating", 0)
- if isinstance(rating, dict):
- rating = rating.get("averageRating", 0)
- if rating > 0:
- brand_ratings.append(rating)
- brand_avg_rating = sum(brand_ratings) / len(brand_ratings) if brand_ratings else 0
-
- # Brand stockout rate
- brand_out_of_stock = sum(1 for p in brand_products if not p.get("inStock", False))
- brand_stockout_rate = (brand_out_of_stock / brand_count * 100) if brand_count > 0 else 0
-
- # Simple strength score: share + rating - stockout_rate
- strength_score = brand_share + (brand_avg_rating * 5) - brand_stockout_rate
-
- brand_strength_scores.append({
- "brand": brand_name,
- "share": round(brand_share, 2),
- "avg_rating": round(brand_avg_rating, 2),
- "stockout_rate": round(brand_stockout_rate, 2),
- "strength_score": round(strength_score, 2)
- })
-
- brand_strength_scores = sorted(brand_strength_scores, key=lambda x: x["strength_score"], reverse=True)
-
- # Heatmap: Brand × Category Matrix (top 10 brands × top 10 categories)
- # Get top 10 brands by product count
- top_10_brands = [b["name"] for b in top_brands]
-
- # Get top 10 categories by product count
- top_10_categories = [c["name"] for c in top_categories[:10]]
-
- # Build matrix: count products for each brand-category combination
- heatmap_data = []
- for cat_name in top_10_categories:
- cat_products = categories_data.get(cat_name, [])
- for brand_name in top_10_brands:
- # Count products for this brand-category pair
- count = sum(1 for p in cat_products
- if p.get("brand", {}).get("name") == brand_name)
-
- if count > 0: # Only include non-zero combinations
- heatmap_data.append({
- "brand": brand_name,
- "category": cat_name,
- "value": count
- })
-
- # Anomalies (outlier prices using IQR method) - DOĞRU HESAPLAMA
- if len(prices) > 4:
- q1, q3 = np.percentile(prices, [25, 75])
- iqr = q3 - q1
- lower_bound = q1 - 1.5 * iqr
- upper_bound = q3 + 1.5 * iqr
-
- anomalies = []
- for p in all_products:
- price = p.get("price", {}).get("sellingPrice", 0)
- if price > 0 and (price < lower_bound or price > upper_bound):
- anomalies.append({
- "name": p.get("name", "Unknown")[:50],
- "brand": p.get("brand", {}).get("name", "Unknown"),
- "price": round(price, 2),
- "type": "expensive" if price > upper_bound else "cheap"
- })
-
- anomalies = sorted(anomalies, key=lambda x: x["price"], reverse=True)[:20]
- else:
- anomalies = []
-
- # Category-based Price Analysis (Price Premium/Discount relative to overall average)
- category_price_analysis = []
- overall_avg_price = avg_price # Genel ortalama fiyat
-
- for cat_name, cat_products in categories_data.items():
- # Her kategorinin ürün fiyatlarını topla
- cat_prices = [p.get("price", {}).get("sellingPrice", 0) for p in cat_products
- if p.get("price", {}).get("sellingPrice", 0) > 0]
-
- if cat_prices:
- cat_avg_price = sum(cat_prices) / len(cat_prices)
- cat_median_price = float(np.percentile(cat_prices, 50))
-
- # Fiyat primi hesaplama: (kategori_ort - genel_ort) / genel_ort * 100
- price_premium = ((cat_avg_price - overall_avg_price) / overall_avg_price * 100) if overall_avg_price > 0 else 0
-
- category_price_analysis.append({
- "category": cat_name,
- "avg_price": round(cat_avg_price, 2),
- "median_price": round(cat_median_price, 2),
- "price_premium": round(price_premium, 2),
- "product_count": len(cat_prices),
- "min_price": round(min(cat_prices), 2),
- "max_price": round(max(cat_prices), 2)
- })
-
- # Fiyat primine göre sırala
- category_price_analysis_sorted = sorted(category_price_analysis, key=lambda x: x["price_premium"], reverse=True)
-
- # En pahalı 10 kategori (pozitif prim)
- most_expensive_categories = [c for c in category_price_analysis_sorted if c["price_premium"] > 0][:10]
-
- # En ucuz 10 kategori (negatif prim)
- most_affordable_categories = [c for c in category_price_analysis_sorted if c["price_premium"] < 0][-10:]
- most_affordable_categories.reverse() # En ucuzdan en pahalıya doğru sırala
-
- # ============================================================================
- # MENŞEİ ÜLKE VE BARKOD ANALİZİ
- # ============================================================================
-
- # Ülke kodlarını tam isimlere çeviren mapping
- COUNTRY_NAMES = {
- "TR": "Türkiye",
- "CN": "Çin",
- "US": "Amerika",
- "GB": "İngiltere",
- "FR": "Fransa",
- "DE": "Almanya",
- "IT": "İtalya",
- "ES": "İspanya",
- "KR": "Güney Kore",
- "JP": "Japonya",
- "IN": "Hindistan",
- "TW": "Tayvan",
- "HK": "Hong Kong",
- "TH": "Tayland",
- "VN": "Vietnam",
- "PL": "Polonya",
- "CZ": "Çek Cumhuriyeti",
- "RO": "Romanya",
- "BG": "Bulgaristan",
- "GR": "Yunanistan",
- "PT": "Portekiz",
- "NL": "Hollanda",
- "BE": "Belçika",
- "CH": "İsviçre",
- "AT": "Avusturya",
- "SE": "İsveç",
- "NO": "Norveç",
- "DK": "Danimarka",
- "FI": "Finlandiya",
- "RU": "Rusya",
- "UA": "Ukrayna",
- "AE": "Birleşik Arap Emirlikleri",
- "SA": "Suudi Arabistan",
- "IL": "İsrail",
- "EG": "Mısır",
- "ZA": "Güney Afrika",
- "BR": "Brezilya",
- "MX": "Meksika",
- "CA": "Kanada",
- "AU": "Avustralya",
- "NZ": "Yeni Zelanda",
- "SG": "Singapur",
- "MY": "Malezya",
- "ID": "Endonezya",
- "PH": "Filipinler",
- "PK": "Pakistan",
- "BD": "Bangladeş",
- "AZ": "Azerbaycan",
- }
-
- # Barkod prefix'lerine göre ülke kodu mapping (EAN-13 standardı)
- BARCODE_COUNTRIES = {
- # Trendyol Özel Barkodlar (Harfli)
- "TYB": "Trendyol (İç Barkod)",
- "SGT": "Trendyol Satıcı",
- "KPE": "Trendyol Kampanya",
- "RTN": "Trendyol İade",
- "CDM": "Trendyol Özel",
-
- # EAN-13 Standart Barkodlar
- "00-13": "ABD & Kanada",
- "190-199": "Rezerve/Özel Kullanım",
- "20-29": "Mağaza İçi Kullanım",
- "30-37": "Fransa",
- "380": "Bulgaristan",
- "383": "Slovenya",
- "370": "Litvanya",
- "372": "Estonya",
- "373": "Moldova",
- "375": "Belarus",
- "377": "Ermenistan",
- "379": "Kazakistan",
- "385": "Hırvatistan",
- "387": "Bosna Hersek",
- "400-440": "Almanya",
- "45-49": "Japonya",
- "50": "İngiltere",
- "520-521": "Yunanistan",
- "528": "Lübnan",
- "529": "Kıbrıs",
- "530": "Arnavutluk",
- "531": "Makedonya",
- "535": "Malta",
- "539": "İrlanda",
- "54": "Belçika & Lüksemburg",
- "560": "Portekiz",
- "569": "İzlanda",
- "57": "Danimarka",
- "590": "Polonya",
- "594": "Romanya",
- "599": "Macaristan",
- "600-601": "Güney Afrika",
- "603": "Gana",
- "608": "Bahreyn",
- "609": "Mauritius",
- "611": "Fas",
- "613": "Cezayir",
- "615": "Nijerya",
- "616": "Kenya",
- "618": "Fildişi Sahili",
- "619": "Tunus",
- "621": "Suriye",
- "622": "Mısır",
- "624": "Libya",
- "625": "Ürdün",
- "626": "İran",
- "627": "Kuveyt",
- "628": "Suudi Arabistan",
- "629": "BAE",
- "630": "Katar",
- "631": "Umman",
- "64": "Finlandiya",
- "690-699": "Çin",
- "70": "Norveç",
- "710-719": "Rezerve/Özel Kullanım",
- "729": "İsrail",
- "73": "İsveç",
- "740": "Guatemala",
- "741": "El Salvador",
- "742": "Honduras",
- "743": "Nikaragua",
- "744": "Kosta Rika",
- "745": "Panama",
- "746": "Dominik Cumhuriyeti",
- "750": "Meksika",
- "754-755": "Kanada",
- "759": "Venezuela",
- "76": "İsviçre",
- "770-771": "Kolombiya",
- "773": "Uruguay",
- "775": "Peru",
- "777": "Bolivya",
- "779": "Arjantin",
- "780": "Şili",
- "784": "Paraguay",
- "786": "Ekvador",
- "789-790": "Brezilya",
- "80-83": "İtalya",
- "84": "İspanya",
- "850": "Küba",
- "858": "Slovakya",
- "859": "Çek Cumhuriyeti",
- "860": "Sırbistan",
- "865": "Moğolistan",
- "867": "Kuzey Kore",
- "868-869": "Türkiye",
- "87": "Hollanda",
- "880": "Güney Kore",
- "884": "Kamboçya",
- "885": "Tayland",
- "888": "Singapur",
- "890": "Hindistan",
- "893": "Vietnam",
- "896": "Pakistan",
- "899": "Endonezya",
- "90-91": "Avusturya",
- "93": "Avustralya",
- "94": "Yeni Zelanda",
- "955": "Malezya",
- "958": "Makao",
- "977": "Süreli Yayınlar (ISSN)",
- "978-979": "Kitaplar (ISBN)",
- "980": "Para İade Kuponları",
- "981-984": "Kuponlar",
- "99": "Kuponlar",
- }
-
- # Menşei ülke verilerini topla
- origin_countries = []
- barcodes = []
- products_with_origin = 0
- products_with_barcode = 0
-
- for cat_name, cat_products in categories_data.items():
- for product in cat_products:
- # Menşei ülke bilgisini çıkar
- merchant_listings = product.get("merchantListings", [])
- if merchant_listings and len(merchant_listings) > 0:
- custom_values = merchant_listings[0].get("customValues", [])
- for cv in custom_values:
- if cv.get("key") == "origin":
- country_code = cv.get("value", "").upper()
- if country_code:
- origin_countries.append(country_code)
- products_with_origin += 1
- break
-
- # Barkod bilgisini çıkar
- if merchant_listings and len(merchant_listings) > 0:
- variants = merchant_listings[0].get("variants", [])
- if variants and len(variants) > 0:
- barcode = variants[0].get("barcode", "")
- if barcode:
- barcodes.append(barcode)
- products_with_barcode += 1
-
- # Menşei ülke analizi
- origin_country_counts = {}
- for country_code in origin_countries:
- origin_country_counts[country_code] = origin_country_counts.get(country_code, 0) + 1
-
- # Ülke kodlarını tam isimlere çevir ve sırala
- origin_country_data = []
- for code, count in origin_country_counts.items():
- country_name = COUNTRY_NAMES.get(code, f"Diğer ({code})")
- percentage = (count / products_with_origin * 100) if products_with_origin > 0 else 0
- origin_country_data.append({
- "country_code": code,
- "country_name": country_name,
- "product_count": count,
- "percentage": round(percentage, 2)
- })
-
- origin_country_data_sorted = sorted(origin_country_data, key=lambda x: x["product_count"], reverse=True)
-
- # Barkod prefix analizi (ilk 3 hane)
- barcode_prefixes = {}
- barcode_countries_detected = {}
-
- for barcode in barcodes:
- if len(barcode) >= 3:
- prefix = barcode[:3]
- barcode_prefixes[prefix] = barcode_prefixes.get(prefix, 0) + 1
-
- # Prefix'ten ülke tespiti
- detected_country = "Bilinmiyor"
- prefix_num = barcode[:3]
-
- # Tek prefix kontrolü
- for key, country in BARCODE_COUNTRIES.items():
- if "-" in key:
- start, end = key.split("-")
- # Sayısal karşılaştırma yap (aralık uzunluğuna göre prefix'i kırp)
- try:
- range_len = len(start)
- prefix_to_check = prefix_num[:range_len] if len(prefix_num) >= range_len else prefix_num
- prefix_int = int(prefix_to_check) if prefix_to_check.isdigit() else -1
- start_int = int(start)
- end_int = int(end)
- if prefix_int >= start_int and prefix_int <= end_int:
- detected_country = country
- break
- except ValueError:
- continue
- elif key == prefix_num[:len(key)]:
- detected_country = country
- break
-
- barcode_countries_detected[detected_country] = barcode_countries_detected.get(detected_country, 0) + 1
-
- # Barkod prefix'lerini sırala
- barcode_prefix_data = []
- for prefix, count in barcode_prefixes.items():
- percentage = (count / products_with_barcode * 100) if products_with_barcode > 0 else 0
-
- # Prefix'ten ülke bul
- detected_country = "Bilinmiyor"
- for key, country in BARCODE_COUNTRIES.items():
- if "-" in key:
- start, end = key.split("-")
- # Sayısal karşılaştırma yap (aralık uzunluğuna göre prefix'i kırp)
- try:
- range_len = len(start)
- prefix_to_check = prefix[:range_len] if len(prefix) >= range_len else prefix
- prefix_int = int(prefix_to_check) if prefix_to_check.isdigit() else -1
- start_int = int(start)
- end_int = int(end)
- if prefix_int >= start_int and prefix_int <= end_int:
- detected_country = country
- break
- except ValueError:
- continue
- elif key == prefix[:len(key)]:
- detected_country = country
- break
-
- barcode_prefix_data.append({
- "prefix": prefix,
- "detected_country": detected_country,
- "product_count": count,
- "percentage": round(percentage, 2)
- })
-
- barcode_prefix_data_sorted = sorted(barcode_prefix_data, key=lambda x: x["product_count"], reverse=True)[:20]
-
- # Barkoddan tespit edilen ülkeleri sırala
- barcode_country_data = []
- for country, count in barcode_countries_detected.items():
- percentage = (count / products_with_barcode * 100) if products_with_barcode > 0 else 0
- barcode_country_data.append({
- "country_name": country,
- "product_count": count,
- "percentage": round(percentage, 2)
- })
-
- barcode_country_data_sorted = sorted(barcode_country_data, key=lambda x: x["product_count"], reverse=True)
-
- # ============================================================================
- # SATICI ANALİZİ (MERCHANT ANALYSIS)
- # ============================================================================
-
- merchants_data = {} # merchant_id -> {total_products, total_price, winner_count}
- total_winners = 0
- products_with_merchant = 0
-
- for product in all_products:
- merchant_listings = product.get("merchantListings", [])
- if merchant_listings:
- ml = merchant_listings[0] # İlk satıcı
- merchant = ml.get("merchant", {})
- merchant_id = merchant.get("id")
-
- if merchant_id:
- products_with_merchant += 1
-
- # Satıcı verilerini topla
- if merchant_id not in merchants_data:
- pass
- # Satıcı ismini al, boşsa officialName'i kullan, o da boşsa ID'yi kullan
- merchant_name = merchant.get("name") or merchant.get("officialName") or f"Satıcı {merchant_id}"
- merchants_data[merchant_id] = {
- "merchant_id": merchant_id,
- "merchant_name": merchant_name,
- "product_count": 0,
- "total_price": 0,
- "winner_count": 0
- }
-
- merchants_data[merchant_id]["product_count"] += 1
-
- # Fiyat bilgisi
- price = product.get("price", {}).get("sellingPrice", 0)
- if price > 0:
- merchants_data[merchant_id]["total_price"] += price
-
- # Kazanan satıcı mı?
- if ml.get("isWinner"):
- merchants_data[merchant_id]["winner_count"] += 1
- total_winners += 1
-
- # Satıcı listesi oluştur
- merchant_list = []
- for merchant_id, data in merchants_data.items():
- avg_price = data["total_price"] / data["product_count"] if data["product_count"] > 0 else 0
- winner_ratio = (data["winner_count"] / data["product_count"] * 100) if data["product_count"] > 0 else 0
-
- # Satıcı URL'sini oluştur
- merchant_url = f"https://www.trendyol.com/magaza/{data['merchant_name'].lower().replace(' ', '-')}-m-{data['merchant_id']}" if data["merchant_name"] and data["merchant_name"] != f"Satıcı {data['merchant_id']}" else None
-
- merchant_list.append({
- "merchant_id": data["merchant_id"],
- "merchant_name": data["merchant_name"],
- "merchant_url": merchant_url,
- "product_count": data["product_count"],
- "avg_price": round(avg_price, 2),
- "winner_count": data["winner_count"],
- "winner_ratio": round(winner_ratio, 2)
- })
-
- # Ürün sayısına göre sırala
- merchant_list_sorted = sorted(merchant_list, key=lambda x: x["product_count"], reverse=True)
- top_merchants = merchant_list_sorted[:20]
-
- # Genel satıcı istatistikleri
- total_merchants = len(merchants_data)
- winner_percentage = (total_winners / products_with_merchant * 100) if products_with_merchant > 0 else 0
-
- # ============================================================================
- # STOK MİKTAR ANALİZİ (STOCK QUANTITY ANALYSIS)
- # ============================================================================
-
- # DISABLED: Stock quantity analysis (not needed per user request)
- # stock_quantities = []
- # category_stocks = {} # category -> [quantities]
- # products_with_stock_info = 0
- # product_to_category = {} # product_id -> category_name mapping
-
- # # Önce ürün-kategori eşleşmesini oluştur
- # for cat_name, cat_products in categories_data.items():
- # for product in cat_products:
- # product_id = product.get("id")
- # if product_id:
- # product_to_category[product_id] = cat_name
-
- # for product in all_products:
- # merchant_listings = product.get("merchantListings", [])
- # if merchant_listings:
- # ml = merchant_listings[0]
- # variants = ml.get("variants", [])
- # if variants:
- # quantity = variants[0].get("quantity")
- # if quantity is not None and quantity > 0:
- # stock_quantities.append(quantity)
- # products_with_stock_info += 1
-
- # # Kategori bazlı stok - mapping'den al
- # product_id = product.get("id")
- # cat_name = product_to_category.get(product_id, "Diğer")
-
- # if cat_name not in category_stocks:
- # category_stocks[cat_name] = []
- # category_stocks[cat_name].append(quantity)
-
- # # Stok istatistikleri
- # if stock_quantities:
- # avg_stock = sum(stock_quantities) / len(stock_quantities)
- # median_stock = float(np.percentile(stock_quantities, 50))
- # total_stock = sum(stock_quantities)
- # min_stock = min(stock_quantities)
- # max_stock = max(stock_quantities)
- # else:
- # avg_stock = median_stock = total_stock = min_stock = max_stock = 0
-
- # # Kategori bazlı stok analizi
- # category_stock_analysis = []
- # for cat_name, quantities in category_stocks.items():
- # cat_avg_stock = sum(quantities) / len(quantities) if quantities else 0
- # cat_total_stock = sum(quantities)
-
- # category_stock_analysis.append({
- # "category": cat_name,
- # "avg_stock": round(cat_avg_stock, 2),
- # "total_stock": cat_total_stock,
- # "product_count": len(quantities),
- # "min_stock": min(quantities) if quantities else 0,
- # "max_stock": max(quantities) if quantities else 0
- # })
-
- # # Toplam stoka göre sırala
- # category_stock_sorted = sorted(category_stock_analysis, key=lambda x: x["total_stock"], reverse=True)
-
- # # Stok dağılımı (binning)
- # stock_distribution = {
- # "0-100": 0,
- # "101-500": 0,
- # "501-1000": 0,
- # "1001-5000": 0,
- # "5000+": 0
- # }
-
- # for qty in stock_quantities:
- # if qty <= 100:
- # stock_distribution["0-100"] += 1
- # elif qty <= 500:
- # stock_distribution["101-500"] += 1
- # elif qty <= 1000:
- # stock_distribution["501-1000"] += 1
- # elif qty <= 5000:
- # stock_distribution["1001-5000"] += 1
- # else:
- # stock_distribution["5000+"] += 1
-
- # Basitleştirilmiş ürün listesi (sadece fiyat analizi için)
- # Full products data for Overview tab
- full_products = []
-
- for product in all_products:
- price = product.get("price", {}).get("sellingPrice")
- category = product.get("categoryName") or product.get("category")
- brand = product.get("brand", {}).get("name") or product.get("brandName") or "Bilinmeyen"
-
- # Extract category name if it's a dict
- if isinstance(category, dict):
- category_name = category.get("name", "")
- else:
- category_name = category if category else ""
-
- # Social proof data (orders, views, baskets, favorites, etc.) - socialProofs is an array
- social_proofs = product.get("socialProofs", [])
- orders = 0
- page_views = 0
- baskets = 0
- favorites = 0
-
- if isinstance(social_proofs, list):
- for proof in social_proofs:
- proof_type = proof.get("type", "")
- value_str = proof.get("value", "0")
-
- # Parse value (can be string like "208" or "1k")
- try:
- if "k" in value_str.lower():
- parsed_value = int(float(value_str.lower().replace("k", "")) * 1000)
- else:
- parsed_value = int(value_str)
- except:
- parsed_value = 0
-
- # Assign to appropriate field
- if proof_type == "orderCountL3D":
- orders = parsed_value
- elif proof_type == "pageViewCount":
- page_views = parsed_value
- elif proof_type == "basketCount":
- baskets = parsed_value
- elif proof_type == "favoriteCount":
- favorites = parsed_value
-
- # Product image and URL
- images = product.get("images", [])
- image_url = images[0] if isinstance(images, list) and len(images) > 0 else ""
-
- # Trendyol URL
- product_url = product.get("url", "")
- if not product_url:
- content_id = product.get("contentId") or product.get("id")
- if content_id:
- product_url = f"https://www.trendyol.com/p/{content_id}"
-
- # Extract barcode from winnerVariant
- barcode = ""
- winner_variant = product.get("winnerVariant", {})
- if isinstance(winner_variant, dict):
- barcode = winner_variant.get("barcode", "")
-
- # Extract country (origin) from merchantListings
- country_code = ""
- country_name = "Bilinmeyen" # Default value for products without origin data
- merchant_listings_temp = product.get("merchantListings", [])
- if merchant_listings_temp and len(merchant_listings_temp) > 0:
- custom_values = merchant_listings_temp[0].get("customValues", [])
- for cv in custom_values:
- if cv.get("key") == "origin":
- country_code = cv.get("value", "").upper()
- country_name = COUNTRY_NAMES.get(country_code, f"Diğer ({country_code})" if country_code else "Bilinmeyen")
- break
-
- # Extract review count
- review_count = 0
- try:
- review_count = int(product.get("rating_count", 0) or 0)
- except:
- try:
- rating_obj = product.get("rating", {})
- if isinstance(rating_obj, dict):
- review_count = int(rating_obj.get("totalComments", 0) or rating_obj.get("totalCount", 0) or 0)
- except:
- review_count = 0
-
- # Extract rating score
- rating_score = 0.0
- try:
- rating_obj = product.get("rating", {})
- if isinstance(rating_obj, dict):
- rating_score = float(rating_obj.get("averageRating", 0) or rating_obj.get("score", 0) or 0)
- except:
- rating_score = 0.0
-
- if price and category_name:
- full_products.append({
- "id": product.get("contentId") or product.get("id"),
- "name": product.get("name", ""),
- "brand": brand,
- "price": price,
- "category_name": category_name,
- "orders": orders,
- "page_views": page_views,
- "baskets": baskets, # Basket/cart additions
- "favorites": favorites, # Wishlist/favorites count
- "review_count": review_count, # Review/comment count
- "rating": rating_score, # Average rating score (0-5)
- "image_url": image_url if image_url else "https://via.placeholder.com/150",
- "url": product_url,
- "barcode": barcode, # Barcode field added for barcode analysis
- "country_code": country_code, # Country code (TR, CN, DE, etc.)
- "country": country_name # Country name (Türkiye, Çin, Almanya, etc.)
- })
-
- result = {
- "report_id": report_id,
- "report_name": report.name,
- "kpis": kpis,
- "all_products": full_products, # Full product data with social proof, images, URLs
- "charts": {
- "price_distribution": price_ranges,
- "top_categories": top_categories,
- "top_brands": top_brands,
- # DISABLED: "stock_status": stock_status, # Not needed per user request
- "rating_distribution": rating_distribution,
- "brand_price_boxplot": brand_price_stats,
- "price_rating_scatter": scatter_data,
- "brand_strength": brand_strength_scores,
- "brand_category_heatmap": heatmap_data,
- "category_price_premium": {
- "all_categories": category_price_analysis_sorted,
- "most_expensive": most_expensive_categories,
- "most_affordable": most_affordable_categories
- },
- "origin_analysis": {
- "countries": origin_country_data_sorted,
- "top_countries": origin_country_data_sorted[:10],
- "total_products_with_origin": products_with_origin,
- "coverage_percentage": round((products_with_origin / total_products * 100), 2) if total_products > 0 else 0
- },
- "barcode_analysis": {
- "prefixes": barcode_prefix_data_sorted,
- "countries_from_barcode": barcode_country_data_sorted,
- "top_countries_from_barcode": barcode_country_data_sorted[:10],
- "total_products_with_barcode": products_with_barcode,
- "coverage_percentage": round((products_with_barcode / total_products * 100), 2) if total_products > 0 else 0
- },
- "merchant_analysis": {
- "merchants": merchant_list_sorted,
- "top_merchants": top_merchants,
- "total_merchants": total_merchants,
- "total_products_with_merchant": products_with_merchant,
- "total_winners": total_winners,
- "winner_percentage": round(winner_percentage, 2),
- "coverage_percentage": round((products_with_merchant / total_products * 100), 2) if total_products > 0 else 0
- }
- # DISABLED: Stock quantity analysis (not needed per user request)
- # "stock_analysis": {
- # "avg_stock": round(avg_stock, 2),
- # "median_stock": round(median_stock, 2),
- # "total_stock": total_stock,
- # "min_stock": min_stock,
- # "max_stock": max_stock,
- # "products_with_stock_info": products_with_stock_info,
- # "coverage_percentage": round((products_with_stock_info / total_products * 100), 2) if total_products > 0 else 0,
- # "distribution": stock_distribution,
- # "category_stocks": category_stock_sorted,
- # "top_stocked_categories": category_stock_sorted[:10]
- # }
- },
- "insights": {
- "low_rating_products": low_rating_products,
- "anomalies": anomalies
- }
- }
-
- # Cache the result for 1 hour
- dashboard_cache[cache_key] = (result, time.time())
- print(f"📊 Cached dashboard data for report {report_id}")
-
- return result
+ save_consolidated_report(report_id, data, REPORTS_DIR)
+ return data
# ============================================================================
@@ -3401,7 +2387,7 @@ def social_proof(report_id: int, refresh: bool = False, batch_size: int = 5, db:
return result
except Exception as e:
- pass
+ log_api.error(f"Enrichment failed for report: {e}", exc_info=True)
# Mark as failed
enrichment_progress.set(progress_key, {
"status": "failed",
@@ -3468,8 +2454,19 @@ def sales_analytics(report_id: int):
# Return top products by orders
top_by_orders = sorted(enriched_products, key=lambda x: x.get("orders", 0), reverse=True)[:20]
+ # Aggregate totals for sales funnel
+ total_views = sum(p.get("page_views", 0) for p in enriched_products)
+ total_baskets = sum(p.get("baskets", 0) for p in enriched_products)
+ total_orders = sum(p.get("orders", 0) for p in enriched_products)
+
return {
- "top_products_by_orders": top_by_orders
+ "top_products_by_orders": top_by_orders,
+ "total_views": total_views,
+ "total_baskets": total_baskets,
+ "total_orders": total_orders,
+ "view_to_basket_rate": round((total_baskets / total_views * 100), 2) if total_views > 0 else 0,
+ "basket_to_order_rate": round((total_orders / total_baskets * 100), 2) if total_baskets > 0 else 0,
+ "view_to_order_rate": round((total_orders / total_views * 100), 2) if total_views > 0 else 0,
}
except Exception as e:
@@ -3630,26 +2627,24 @@ def keyword_analysis(
Returns:
Keyword analiz sonuçları
"""
- # print(f"🔍 ========== KEYWORD ANALYSIS REQUEST ==========")
- print(f"📋 Report ID: {report_id}")
- print(f"⚙️ Parameters: min_frequency={min_frequency}, min_length={min_length}, word_count={min_word_count}-{max_word_count}, top_n={top_n}, category_filter={category_filter}")
+ log_keywords.info(f"Keyword analysis: report={report_id}, min_freq={min_frequency}, word_count={min_word_count}-{max_word_count}, top_n={top_n}")
try:
- print(f"📦 Ürünler yükleniyor...")
+ log_keywords.info("Ürünler yükleniyor...")
# Load products
all_products, categories_data = load_report_products(db, report_id)
- print(f"✅ {len(all_products) if all_products else 0} ürün yüklendi")
+ log_keywords.info(f"{len(all_products) if all_products else 0} ürün yüklendi")
if not all_products:
- print(f"⚠️ Rapor için ürün bulunamadı!")
+ log_keywords.warning("Rapor için ürün bulunamadı!")
return {"error": "No products found for this report"}
# Load social proof data
- print(f"📊 Social proof data yükleniyor...")
+ log_keywords.info("Social proof data yükleniyor...")
social_json_path = os.path.join(REPORTS_DIR, f"enrich_{report_id}", "social.json")
social_data = _load_json(social_json_path)
social_details = social_data.get("details", {}) if social_data else {}
- print(f"✅ Social proof data yüklendi: {len(social_details)} ürün (path: {social_json_path})")
+ log_keywords.info(f"Social proof data yüklendi: {len(social_details)} ürün")
# Filter by category if specified
if category_filter:
@@ -3659,7 +2654,7 @@ def keyword_analysis(
]
# Step 1: Extract keywords from all product names (OPTIMIZED)
- print(f"🔤 Keyword extraction başlatılıyor... ({len(all_products)} ürün)")
+ log_keywords.info(f"Keyword extraction başlatılıyor... ({len(all_products)} ürün)")
keyword_to_products = {} # {keyword: [product_ids]}
product_keywords_map = {} # {product_id: [keywords]}
@@ -3712,13 +2707,13 @@ def keyword_analysis(
elapsed = time.time() - start_time
rate = processed_count / elapsed if elapsed > 0 else 0
remaining = (len(all_products) - processed_count) / rate if rate > 0 else 0
- print(f"⏳ İşlenen ürün: {processed_count}/{len(all_products)} ({rate:.0f} ürün/sn, ~{remaining:.0f}s kaldı)")
+ log_keywords.info(f"İşlenen ürün: {processed_count}/{len(all_products)} ({rate:.0f} ürün/sn, ~{remaining:.0f}s kaldı)")
elapsed_total = time.time() - start_time
- print(f"✅ Keyword extraction tamamlandı: {len(keyword_to_products)} unique keyword bulundu ({elapsed_total:.2f}s)")
+ log_keywords.info(f"Keyword extraction tamamlandı: {len(keyword_to_products)} unique keyword ({elapsed_total:.2f}s)")
# Step 2: Separate rare keywords (frequency 1-2) and common keywords (>= min_frequency)
- print(f"🔍 Keyword ayrıştırma: rare (1-2) vs common (>={min_frequency})")
+ log_keywords.info(f"Keyword ayrıştırma: rare (1-2) vs common (>={min_frequency})")
rare_keywords = {
kw: product_ids
for kw, product_ids in keyword_to_products.items()
@@ -3729,10 +2724,10 @@ def keyword_analysis(
for kw, product_ids in keyword_to_products.items()
if len(product_ids) >= min_frequency
}
- print(f"✅ Rare keywords: {len(rare_keywords)} | Common keywords: {len(filtered_keywords)}")
+ log_keywords.info(f"Rare keywords: {len(rare_keywords)} | Common keywords: {len(filtered_keywords)}")
# Step 3: Calculate metrics for each keyword (OPTIMIZED)
- print(f"📊 Metrikler hesaplanıyor... ({len(filtered_keywords)} keyword)")
+ log_keywords.info(f"Metrikler hesaplanıyor... ({len(filtered_keywords)} keyword)")
keyword_metrics = []
# Create product lookup dict for faster access
@@ -3823,8 +2818,8 @@ def keyword_analysis(
"views": views,
"orders": orders,
"reviews": review_count,
- "price": product.get("price", {}).get("sellingPrice", 0) if isinstance(product.get("price"), dict) else 0,
- "image_url": product.get("images", [])[0] if product.get("images") else "https://via.placeholder.com/150",
+ "price": _extract_price(product),
+ "image_url": product.get("imageUrl", "") or (product.get("images", [])[0] if product.get("images") else "https://via.placeholder.com/150"),
"url": product.get("url", "") or f"https://www.trendyol.com/p/{pid}"
})
@@ -3893,10 +2888,10 @@ def keyword_analysis(
elapsed_metric = time.time() - metric_start_time
rate = metric_count / elapsed_metric if elapsed_metric > 0 else 0
remaining = (len(filtered_keywords) - metric_count) / rate if rate > 0 else 0
- print(f"⏳ İşlenen keyword: {metric_count}/{len(filtered_keywords)} ({rate:.1f} keyword/sn, ~{remaining:.0f}s kaldı)")
+ log_keywords.info(f"İşlenen keyword: {metric_count}/{len(filtered_keywords)} ({rate:.1f} keyword/sn, ~{remaining:.0f}s kaldı)")
metric_elapsed = time.time() - metric_start_time
- print(f"✅ Metrikler hesaplandı: {len(keyword_metrics)} keyword ({metric_elapsed:.2f}s)")
+ log_keywords.info(f"Metrikler hesaplandı: {len(keyword_metrics)} keyword ({metric_elapsed:.2f}s)")
# Step 4: Apply advanced filters
# print(f"🔍 Gelişmiş filtreler uygulanıyor...")
@@ -3968,10 +2963,10 @@ def keyword_analysis(
kw["potential_score"] = round(potential, 2)
filtered_metrics = [kw for kw in filtered_metrics if kw.get("potential_score", 0) >= min_potential_score]
- print(f"✅ Filtreleme sonrası: {len(filtered_metrics)} keyword kaldı")
-
+ log_keywords.info(f"Filtreleme sonrası: {len(filtered_metrics)} keyword kaldı")
+
# Step 5: Sort by selected criteria
- print(f"📈 Sıralama yapılıyor: {sort_by} ({sort_order})...")
+ log_keywords.info(f"Sıralama yapılıyor: {sort_by} ({sort_order})...")
reverse_order = sort_order == "desc"
if sort_by == "frequency":
@@ -4005,10 +3000,10 @@ def keyword_analysis(
# Get paginated keywords
paginated_keywords = filtered_metrics[start_index:end_index]
- print(f"✅ Sayfa {page}/{total_pages} - {len(paginated_keywords)} keyword seçildi (toplam: {total_keywords})")
+ log_keywords.info(f"Sayfa {page}/{total_pages} - {len(paginated_keywords)} keyword (toplam: {total_keywords})")
# Step 6: Process rare keywords (frequency 1-2) - Limited to top 100 for performance
- print(f"📊 Rare keywords işleniyor... ({len(rare_keywords)} keyword)")
+ log_keywords.info(f"Rare keywords işleniyor... ({len(rare_keywords)} keyword)")
rare_metrics = []
rare_count = 0
for keyword, product_ids in rare_keywords.items():
@@ -4047,7 +3042,7 @@ def keyword_analysis(
# Sort rare keywords by orders (most promising first)
rare_metrics.sort(key=lambda x: x["performance"]["total_orders"], reverse=True)
- print(f"✅ Rare keywords işlendi: {len(rare_metrics)} keyword (top 100)")
+ log_keywords.info(f"Rare keywords işlendi: {len(rare_metrics)} keyword (top 100)")
# Step 7: Build category × keyword matrix
category_keyword_matrix = {}
@@ -4087,17 +3082,14 @@ def keyword_analysis(
}
}
- print(f"✅ ========== KEYWORD ANALYSIS COMPLETED ==========")
- print(f"📊 Sonuç: {result['total_keywords']} common keywords, {result['total_rare_keywords']} rare keywords, {result['total_products_analyzed']} ürün")
- print(f"📄 Sayfa {page}/{total_pages} - {len(result['keywords'])} keyword gösteriliyor, {len(result['rare_keywords'])} rare keyword")
+ log_keywords.info(f"Keyword analysis completed: {result['total_keywords']} common, {result['total_rare_keywords']} rare, {result['total_products_analyzed']} ürün")
return result
except Exception as e:
import traceback
error_trace = traceback.format_exc()
- print(f"❌ Keyword analysis error: {str(e)}")
- print(f"Traceback: {error_trace}")
+ log_keywords.error(f"Keyword analysis error: {e}", exc_info=True)
return {"error": str(e), "traceback": error_trace, "note": "Failed to generate keyword analysis"}
@@ -4149,15 +3141,14 @@ def product_finder(
Returns:
Filtrelenmiş ürün listesi
"""
- # print(f"🔍 ========== PRODUCT FINDER REQUEST ==========")
- print(f"📋 Report ID: {report_id}, Page: {page}, Per Page: {per_page}")
+ log_api.info(f"Product finder: report={report_id}, page={page}, per_page={per_page}")
try:
pass
# Load products
all_products, categories_data = load_report_products(db, report_id)
- print(f"✅ {len(all_products)} ürün yüklendi")
-
+ log_api.info(f"{len(all_products)} ürün yüklendi")
+
if not all_products:
return {
"total_products": 0,
@@ -4166,11 +3157,11 @@ def product_finder(
"total_pages": 0,
"products": []
}
-
+
# Load social proof data
social_data = _load_json(f"{REPORTS_DIR}/enrich_{report_id}/social.json")
social_details = social_data.get("details", {}) if social_data else {}
- print(f"✅ Social proof data yüklendi: {len(social_details)} ürün")
+ log_api.info(f"Social proof data yüklendi: {len(social_details)} ürün")
# Create product lookup dict
product_dict = {p.get("id"): p for p in all_products if p.get("id")}
@@ -4225,13 +3216,7 @@ def product_finder(
rating = float(rating_obj)
# Get price
- price = 0
- if product.get("price"):
- price_obj = product.get("price")
- if isinstance(price_obj, dict):
- price = float(price_obj.get("sellingPrice", 0) or 0)
- elif isinstance(price_obj, (int, float)):
- price = float(price_obj)
+ price = _extract_price(product)
# Get category
category = product.get("category", {})
@@ -4362,7 +3347,7 @@ def product_finder(
"barcode": product.get("barcode", "")
})
- print(f"✅ Filtreleme sonrası: {len(filtered_products)} ürün kaldı")
+ log_api.info(f"Filtreleme sonrası: {len(filtered_products)} ürün kaldı")
# Sort products
reverse_order = sort_order == "desc"
@@ -4386,7 +3371,7 @@ def product_finder(
end_idx = start_idx + per_page
paginated_products = filtered_products[start_idx:end_idx]
- print(f"✅ Sayfalama: {len(paginated_products)} ürün gösteriliyor (sayfa {page}/{total_pages})")
+ log_api.info(f"Sayfalama: {len(paginated_products)} ürün (sayfa {page}/{total_pages})")
return {
"total_products": total_products,
@@ -4399,8 +3384,7 @@ def product_finder(
except Exception as e:
import traceback
error_trace = traceback.format_exc()
- print(f"❌ Product finder error: {str(e)}")
- print(f"Traceback: {error_trace}")
+ log_api.error(f"Product finder error: {e}", exc_info=True)
return {
"error": str(e),
"total_products": 0,
@@ -4615,6 +3599,17 @@ def _enrich_report_task(report_id: int):
_save_json(f"{base_dir}/social.json", soc_payload)
time.sleep(0.1)
+ # Enrichment bitti, konsolide dosya oluştur
+ enrichment_progress[report_id] = {"status": "running", "step": "consolidate", "done": 1, "total": 2}
+ try:
+ from data_consolidator import build_consolidated_report, save_consolidated_report
+ consolidated = build_consolidated_report(report_id, db, REPORTS_DIR, social_data=soc_payload)
+ if consolidated:
+ save_consolidated_report(report_id, consolidated, REPORTS_DIR)
+ log_api.info(f"Konsolide rapor oluşturuldu: report {report_id}")
+ except Exception as ce:
+ log_api.warning(f"Konsolidasyon hatası (enrichment devam eder): {ce}", exc_info=True)
+
# DISABLED: Questions, similar products, and followers removed per user request
# # 3) Questions
# enrichment_progress[report_id] = {"status": "running", "step": "questions", "done": 2, "total": 5}
@@ -4634,6 +3629,14 @@ def _enrich_report_task(report_id: int):
# _save_json(f"{base_dir}/followers.json", f_payload)
# time.sleep(0.1)
+ # Invalidate dashboard cache so next request gets fresh data with social proof
+ cache_key = f"dashboard_{report_id}"
+ if isinstance(dashboard_cache, dict) and cache_key in dashboard_cache:
+ del dashboard_cache[cache_key]
+ elif hasattr(dashboard_cache, 'cache') and cache_key in dashboard_cache.cache:
+ del dashboard_cache.cache[cache_key]
+ log_api.info(f"Dashboard cache invalidated for report {report_id} after enrichment")
+
enrichment_progress[report_id] = {"status": "completed", "step": "done", "done": 2, "total": 2}
except Exception as e:
enrichment_progress[report_id] = {"status": "error", "error": str(e)}
@@ -4650,7 +3653,8 @@ def start_enrichment(report_id: int, background: BackgroundTasks):
@app.get("/api/reports/{report_id}/enrich/status")
def enrichment_status(report_id: int):
- return enrichment_progress.get(report_id, {"status": "unknown"})
+ result = enrichment_progress.get(report_id)
+ return result if result is not None else {"status": "unknown"}
# ============================================================================
@@ -4663,8 +3667,8 @@ def get_hidden_champions(
min_rating: float = 4.5,
max_review_count: int = 50,
social_multiplier: float = 1.5,
- min_score: int = 60,
- min_orders: int = 1, # Minimum satış sayısı (satış verisi çok önemli)
+ min_score: int = 30,
+ min_orders: int = 0, # Minimum satış sayısı (0 = sosyal veri yoksa da göster)
limit: int = 50,
db: Session = Depends(get_db)
):
@@ -4772,11 +3776,7 @@ def test_analytics(report_id: int, db: Session = Depends(get_db)):
avg_rating = sum(ratings) / len(ratings) if ratings else 0
# Ortalama fiyat
- prices = [
- p.get("price", {}).get("sellingPrice", 0)
- for p in all_products
- if p.get("price", {}).get("sellingPrice", 0) > 0
- ]
+ prices = [_extract_price(p) for p in all_products if _extract_price(p) > 0]
avg_price = sum(prices) / len(prices) if prices else 0
# 8. HHI yorumu ve stratejik tavsiye
@@ -4964,6 +3964,34 @@ async def test_google_trends(product_name: str = "iPhone 15"):
raise HTTPException(status_code=500, detail=str(e))
+# ---------------------------------------------------------------------------
+# Periodic resource logger (runs every 60s in background)
+# ---------------------------------------------------------------------------
+_resource_logger = get_logger("resources")
+
+async def _periodic_resource_log():
+ """Log cache sizes and circuit breaker state every 60 seconds."""
+ while True:
+ await asyncio.sleep(60)
+ try:
+ cb_status = _social_proof_breaker.get_status()
+ _resource_logger.info(
+ "Resource snapshot",
+ extra={
+ "cache_size": len(dashboard_cache) if isinstance(dashboard_cache, dict) else len(dashboard_cache.cache),
+ "cb_state": cb_status["status"],
+ "failures": cb_status["failures"],
+ },
+ )
+ except Exception:
+ pass # Never crash the background task
+
+@app.on_event("startup")
+async def _start_resource_logger():
+ asyncio.create_task(_periodic_resource_log())
+ _resource_logger.info("Periodic resource logger started (60s interval)")
+
+
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8001)
diff --git a/backend/scraper.py b/backend/scraper.py
index a25257e..3e0427c 100644
--- a/backend/scraper.py
+++ b/backend/scraper.py
@@ -10,6 +10,9 @@ import math
import os
from typing import Dict, List, Any, Optional
from datetime import datetime
+from logging_config import get_logger
+
+log = get_logger("scraper")
class TrendyolScraper:
@@ -55,7 +58,7 @@ class TrendyolScraper:
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
- print(f"❌ Sayfa {page} error: {e}")
+ log.warning(f"Sayfa {page} error: {e}")
return None
def get_total_count(self) -> int:
@@ -96,7 +99,7 @@ class TrendyolScraper:
# Sayfa sayısını hesapla
total_pages = self.calculate_total_pages(total_count, max_pages)
- print(f"📦 Kategori {self.category_id}: {total_count} ürün, {total_pages} sayfa çekilecek")
+ log.info(f"Kategori {self.category_id}: {total_count} ürün, {total_pages} sayfa çekilecek")
# Sayfaları çek
all_products = []
@@ -105,7 +108,7 @@ class TrendyolScraper:
data = self.fetch_page(page)
if not data or not data.get('isSuccess'):
- print(f"⚠️ Sayfa {page} atlandı")
+ log.warning(f"Sayfa {page} atlandı")
continue
products = data.get('products', [])
@@ -144,7 +147,7 @@ class TrendyolScraper:
return True
except Exception as e:
- print(f"❌ Dosya kaydetme hatası: {e}")
+ log.error(f"Dosya kaydetme hatası: {e}")
return False
def get_category_info(self) -> Optional[Dict[str, Any]]:
@@ -157,6 +160,112 @@ class TrendyolScraper:
return data.get('categoryInfo', {})
+class TrendyolSearchScraper:
+ """Trendyol Search API ile ürün çeker — tüm kategori tipleri için çalışır (-c ve -s)"""
+
+ API_BASE_URL = "https://apigw.trendyol.com/discovery-sfint-search-service/api/search/products"
+
+ def __init__(self, path_model: str, page_size: int = 24):
+ self.path_model = path_model
+ self.page_size = page_size
+ self.headers = {
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
+ "Accept": "application/json",
+ "Referer": f"https://www.trendyol.com/{path_model}",
+ "Origin": "https://www.trendyol.com"
+ }
+ self.cookies = {
+ "storefrontId": "1",
+ "language": "tr",
+ "countryCode": "TR"
+ }
+
+ def fetch_page(self, page: int) -> Optional[Dict[str, Any]]:
+ """Tek sayfa çeker"""
+ params = {
+ "pathModel": self.path_model,
+ "pi": page,
+ "ps": self.page_size,
+ "channelId": 1,
+ "storefrontId": 1,
+ "culture": "tr-TR"
+ }
+ try:
+ response = requests.get(
+ self.API_BASE_URL,
+ params=params,
+ headers=self.headers,
+ cookies=self.cookies,
+ timeout=15
+ )
+ response.raise_for_status()
+ return response.json()
+ except requests.exceptions.RequestException as e:
+ log.warning(f"Search API sayfa {page} error ({self.path_model}): {e}")
+ return None
+
+ def fetch_all_products(self, delay: float = 1.0, max_pages: int = 10) -> List[Dict[str, Any]]:
+ """Tüm ürünleri çeker, normalize eder (max_pages=10 x page_size=24 = 240 ürün)"""
+ first = self.fetch_page(1)
+ if not first:
+ return []
+
+ total = first.get("total", 0) or first.get("totalCount", 0) or first.get("roughTotal", 0)
+ raw_products = first.get("products", [])
+
+ if total == 0 and not raw_products:
+ return []
+
+ # total 0 olsa bile ürün varsa en az 1 sayfa çek
+ if total == 0 and raw_products:
+ total = len(raw_products)
+
+ total_pages = min(math.ceil(total / self.page_size), max_pages)
+ log.info(f"Search API {self.path_model}: {total} ürün, {total_pages} sayfa çekilecek")
+
+ for page in range(2, total_pages + 1):
+ data = self.fetch_page(page)
+ if data and data.get("products"):
+ raw_products.extend(data["products"])
+ if page < total_pages:
+ time.sleep(delay)
+
+ return [_normalize_search_product(p) for p in raw_products]
+
+
+def _normalize_search_product(raw: dict) -> dict:
+ """Search API ürün formatını mevcut sisteme uyumlu hale getir"""
+ brand = raw.get("brand", {})
+ if isinstance(brand, str):
+ brand = {"name": brand}
+
+ price = raw.get("price", {})
+ if isinstance(price, (int, float)):
+ price = {"sellingPrice": price, "originalPrice": price}
+ elif isinstance(price, dict) and "sellingPrice" not in price:
+ # Search API returns current/discountedPrice/originalPrice — map to sellingPrice
+ price["sellingPrice"] = price.get("discountedPrice") or price.get("current") or price.get("originalPrice") or price.get("old") or 0
+
+ rating = raw.get("ratingScore", {})
+ if rating is None:
+ rating = {}
+
+ return {
+ "id": raw.get("id") or raw.get("contentId"),
+ "name": raw.get("name", ""),
+ "brand": brand,
+ "price": price,
+ "ratingScore": rating,
+ "url": raw.get("url", ""),
+ "imageUrl": raw.get("image", raw.get("imageUrl", "")),
+ "merchantListings": raw.get("merchantListings", []),
+ "winnerVariant": raw.get("winnerVariant", {}),
+ "socialProofs": raw.get("socialProofs", []),
+ "categoryId": raw.get("categoryId"),
+ "categoryName": raw.get("categoryName"),
+ }
+
+
def scrape_category(category_id: int, category_name: str, output_dir: str = "../categories") -> Dict[str, Any]:
"""
Tek bir kategoriyi çeker
@@ -227,9 +336,7 @@ def scrape_multiple_categories(categories: List[tuple], delay: float = 2.0) -> D
}
for i, (cat_id, cat_name) in enumerate(categories, 1):
- print(f"\n{'='*80}")
- print(f"📂 [{i}/{len(categories)}] {cat_name} (ID: {cat_id})")
- print('='*80)
+ log.info(f"[{i}/{len(categories)}] {cat_name} (ID: {cat_id})")
result = scrape_category(cat_id, cat_name)
results["details"].append(result)
@@ -237,10 +344,10 @@ def scrape_multiple_categories(categories: List[tuple], delay: float = 2.0) -> D
if result["success"]:
results["successful"] += 1
results["total_products"] += result["total_products"]
- print(f"✅ Başarılı: {result['total_products']} ürün")
+ log.info(f"Başarılı: {result['total_products']} ürün")
else:
results["failed"] += 1
- print(f"❌ Hata: {result['error']}")
+ log.error(f"Hata: {result['error']}")
# Kategoriler arası bekleme
if i < len(categories):