mirror of
https://github.com/nethunterzist/trendyol-analiz
synced 2026-07-01 09:27:03 +00:00
Ne yaptık: - build_consolidated_report(): normalized_products boş olunca None yerine empty=true içeren boş dashboard döndürüyor - normalize_product(): seller_count, has_buybox, is_trendyol_product alanları eklendi - Social proof: metrik bazında en doğru kaynak seçimi (inline vs enrichment API) Neden yaptık: - Boş kategorilerde dashboard-data endpoint 500 fırlatıyordu, kullanıcı loading skeleton'dan çıkamıyordu - Satıcı sayısı ve buybox bilgisi Ürünler tablosu için gerekli Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
823 lines
35 KiB
Python
823 lines
35 KiB
Python
"""
|
||
Data Consolidator — tek birleştirilmiş JSON oluşturma modülü.
|
||
|
||
Scraping + enrichment bittiğinde tüm normalizasyon ve hesaplamayı yapar,
|
||
sonucu reports/report_{id}_data.json olarak kaydeder.
|
||
Dashboard endpoint sadece bu dosyayı okur.
|
||
"""
|
||
import json
|
||
import os
|
||
import re
|
||
import time
|
||
import random
|
||
from collections import defaultdict
|
||
from datetime import datetime
|
||
|
||
import numpy as np
|
||
|
||
from logging_config import get_logger
|
||
|
||
log = get_logger("consolidator")
|
||
|
||
# ─────────────────────────────────────────────────────────
|
||
# Ülke kodu → tam isim mapping (menşei analizi için)
|
||
# ─────────────────────────────────────────────────────────
|
||
COUNTRY_NAMES = {
|
||
"TR": "Türkiye", "CN": "Çin", "US": "Amerika", "GB": "İngiltere",
|
||
"FR": "Fransa", "DE": "Almanya", "IT": "İtalya", "ES": "İspanya",
|
||
"KR": "Güney Kore", "JP": "Japonya", "IN": "Hindistan", "TW": "Tayvan",
|
||
"HK": "Hong Kong", "TH": "Tayland", "VN": "Vietnam", "PL": "Polonya",
|
||
"CZ": "Çek Cumhuriyeti", "RO": "Romanya", "BG": "Bulgaristan",
|
||
"GR": "Yunanistan", "PT": "Portekiz", "NL": "Hollanda", "BE": "Belçika",
|
||
"CH": "İsviçre", "AT": "Avusturya", "SE": "İsveç", "NO": "Norveç",
|
||
"DK": "Danimarka", "FI": "Finlandiya", "RU": "Rusya", "UA": "Ukrayna",
|
||
"AE": "Birleşik Arap Emirlikleri", "SA": "Suudi Arabistan", "IL": "İsrail",
|
||
"EG": "Mısır", "ZA": "Güney Afrika", "BR": "Brezilya", "MX": "Meksika",
|
||
"CA": "Kanada", "AU": "Avustralya", "NZ": "Yeni Zelanda", "SG": "Singapur",
|
||
"MY": "Malezya", "ID": "Endonezya", "PH": "Filipinler", "PK": "Pakistan",
|
||
"BD": "Bangladeş", "AZ": "Azerbaycan",
|
||
}
|
||
|
||
# Barkod prefix → ülke (EAN-13)
|
||
BARCODE_COUNTRIES = {
|
||
"TYB": "Trendyol (İç Barkod)", "SGT": "Trendyol Satıcı",
|
||
"KPE": "Trendyol Kampanya", "RTN": "Trendyol İade", "CDM": "Trendyol Özel",
|
||
"00-13": "ABD & Kanada", "190-199": "Rezerve/Özel Kullanım",
|
||
"20-29": "Mağaza İçi Kullanım", "30-37": "Fransa",
|
||
"380": "Bulgaristan", "383": "Slovenya", "370": "Litvanya",
|
||
"372": "Estonya", "373": "Moldova", "375": "Belarus",
|
||
"377": "Ermenistan", "379": "Kazakistan", "385": "Hırvatistan",
|
||
"387": "Bosna Hersek", "400-440": "Almanya", "45-49": "Japonya",
|
||
"50": "İngiltere", "520-521": "Yunanistan", "528": "Lübnan",
|
||
"529": "Kıbrıs", "530": "Arnavutluk", "531": "Makedonya",
|
||
"535": "Malta", "539": "İrlanda", "54": "Belçika & Lüksemburg",
|
||
"560": "Portekiz", "569": "İzlanda", "57": "Danimarka",
|
||
"590": "Polonya", "594": "Romanya", "599": "Macaristan",
|
||
"600-601": "Güney Afrika", "603": "Gana", "608": "Bahreyn",
|
||
"609": "Mauritius", "611": "Fas", "613": "Cezayir",
|
||
"615": "Nijerya", "616": "Kenya", "618": "Fildişi Sahili",
|
||
"619": "Tunus", "621": "Suriye", "622": "Mısır",
|
||
"624": "Libya", "625": "Ürdün", "626": "İran",
|
||
"627": "Kuveyt", "628": "Suudi Arabistan", "629": "BAE",
|
||
"630": "Katar", "631": "Umman", "64": "Finlandiya",
|
||
"690-699": "Çin", "70": "Norveç", "710-719": "Rezerve/Özel Kullanım",
|
||
"729": "İsrail", "73": "İsveç", "740": "Guatemala",
|
||
"741": "El Salvador", "742": "Honduras", "743": "Nikaragua",
|
||
"744": "Kosta Rika", "745": "Panama", "746": "Dominik Cumhuriyeti",
|
||
"750": "Meksika", "754-755": "Kanada", "759": "Venezuela",
|
||
"76": "İsviçre", "770-771": "Kolombiya", "773": "Uruguay",
|
||
"775": "Peru", "777": "Bolivya", "779": "Arjantin",
|
||
"780": "Şili", "784": "Paraguay", "786": "Ekvador",
|
||
"789-790": "Brezilya", "80-83": "İtalya", "84": "İspanya",
|
||
"850": "Küba", "858": "Slovakya", "859": "Çek Cumhuriyeti",
|
||
"860": "Sırbistan", "865": "Moğolistan", "867": "Kuzey Kore",
|
||
"868-869": "Türkiye", "87": "Hollanda", "880": "Güney Kore",
|
||
"884": "Kamboçya", "885": "Tayland", "888": "Singapur",
|
||
"890": "Hindistan", "893": "Vietnam", "896": "Pakistan",
|
||
"899": "Endonezya", "90-91": "Avusturya", "93": "Avustralya",
|
||
"94": "Yeni Zelanda", "955": "Malezya", "958": "Makao",
|
||
"977": "Süreli Yayınlar (ISSN)", "978-979": "Kitaplar (ISBN)",
|
||
"980": "Para İade Kuponları", "981-984": "Kuponlar", "99": "Kuponlar",
|
||
}
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────
|
||
# Yardımcı fonksiyonlar
|
||
# ─────────────────────────────────────────────────────────
|
||
|
||
def _extract_price(p):
|
||
"""Extract selling price from product, handling both old and Search API formats."""
|
||
pr = p.get("price", {})
|
||
if isinstance(pr, (int, float)):
|
||
return pr
|
||
return (pr.get("sellingPrice") or pr.get("discountedPrice")
|
||
or pr.get("current") or pr.get("originalPrice")
|
||
or pr.get("old") or 0)
|
||
|
||
|
||
def _extract_rating(p):
|
||
"""Extract average rating from product."""
|
||
rating = p.get("ratingScore") or p.get("rating", 0)
|
||
if isinstance(rating, dict):
|
||
rating = rating.get("averageRating", 0)
|
||
try:
|
||
return float(rating) if rating else 0.0
|
||
except (ValueError, TypeError):
|
||
return 0.0
|
||
|
||
|
||
def _extract_review_count(p):
|
||
"""Extract review/comment count from product."""
|
||
review_count = 0
|
||
try:
|
||
review_count = int(p.get("rating_count", 0) or 0)
|
||
except (ValueError, TypeError, AttributeError):
|
||
pass
|
||
if not review_count:
|
||
try:
|
||
rating_obj = p.get("ratingScore") or p.get("rating", {})
|
||
if isinstance(rating_obj, dict):
|
||
review_count = int(
|
||
rating_obj.get("totalCount", 0)
|
||
or rating_obj.get("totalComments", 0)
|
||
or 0
|
||
)
|
||
except (ValueError, TypeError, AttributeError):
|
||
review_count = 0
|
||
return review_count
|
||
|
||
|
||
def _parse_social_value(value_str):
|
||
"""Parse social proof value like '642', '1.2k', '10B+' etc."""
|
||
try:
|
||
s = str(value_str).strip()
|
||
if "k" in s.lower():
|
||
return int(float(s.lower().replace("k", "").replace("+", "")) * 1000)
|
||
if "b+" in s.lower():
|
||
return int(float(s.lower().replace("b+", "")) * 1_000_000_000)
|
||
if "m+" in s.lower():
|
||
return int(float(s.lower().replace("m+", "")) * 1_000_000)
|
||
return int(s.replace("+", ""))
|
||
except (ValueError, TypeError):
|
||
return 0
|
||
|
||
|
||
def _detect_barcode_country(prefix_num):
|
||
"""Detect country from barcode prefix using BARCODE_COUNTRIES mapping."""
|
||
for key, country in BARCODE_COUNTRIES.items():
|
||
if "-" in key:
|
||
start, end = key.split("-")
|
||
try:
|
||
range_len = len(start)
|
||
prefix_to_check = prefix_num[:range_len] if len(prefix_num) >= range_len else prefix_num
|
||
prefix_int = int(prefix_to_check) if prefix_to_check.isdigit() else -1
|
||
if int(start) <= prefix_int <= int(end):
|
||
return country
|
||
except ValueError:
|
||
continue
|
||
elif key == prefix_num[:len(key)]:
|
||
return country
|
||
return "Bilinmiyor"
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────
|
||
# 1. normalize_product
|
||
# ─────────────────────────────────────────────────────────
|
||
|
||
def normalize_product(raw_product, category_name, social_details):
|
||
"""
|
||
Ham ürünü flat yapıya dönüştür.
|
||
Öncelik: inline socialProofs (Top Rankings) > enrichment API (social_details)
|
||
"""
|
||
product_id = raw_product.get("contentId") or raw_product.get("id")
|
||
price = _extract_price(raw_product)
|
||
rating = _extract_rating(raw_product)
|
||
review_count = _extract_review_count(raw_product)
|
||
|
||
brand = raw_product.get("brand", {})
|
||
brand_name = (brand.get("name") if isinstance(brand, dict) else brand) or "Bilinmeyen"
|
||
|
||
# ── Social proof: metrik bazında en doğru kaynağı seç ──
|
||
orders, page_views, baskets, favorites = 0, 0, 0, 0
|
||
|
||
# 1. Inline socialProofs (Top Rankings API) — sipariş için kesin sayı verir
|
||
inline_orders, inline_views, inline_baskets, inline_favorites = 0, 0, 0, 0
|
||
social_proofs = raw_product.get("socialProofs", [])
|
||
if isinstance(social_proofs, list):
|
||
for proof in social_proofs:
|
||
proof_type = proof.get("type", "")
|
||
parsed = _parse_social_value(proof.get("value", "0"))
|
||
if proof_type == "orderCountL3D":
|
||
inline_orders = parsed
|
||
elif proof_type == "pageViewCount":
|
||
inline_views = parsed
|
||
elif proof_type == "basketCount":
|
||
inline_baskets = parsed
|
||
elif proof_type == "favoriteCount":
|
||
inline_favorites = parsed
|
||
|
||
# 2. Enrichment API (social.json) — görüntülenme/sepet/favori için daha doğru
|
||
sp = {}
|
||
if product_id and social_details:
|
||
sp = (social_details.get(str(product_id))
|
||
or social_details.get(int(product_id) if str(product_id).isdigit() else -1)
|
||
or {})
|
||
enrich_orders = sp.get("orders", 0) or 0
|
||
enrich_views = sp.get("page_views", 0) or 0
|
||
enrich_baskets = sp.get("baskets", 0) or 0
|
||
enrich_favorites = sp.get("favorites", 0) or 0
|
||
|
||
# 3. Metrik bazında en doğru kaynağı seç:
|
||
# - Sipariş: Top Rankings kesin sayı verir (294), SP API yuvarlar (500+)
|
||
# - Diğerleri: SP API daha kesin (17.8B=17800), Top Rankings yuvarlar (3k=3000)
|
||
orders = inline_orders or enrich_orders
|
||
page_views = enrich_views or inline_views
|
||
baskets = enrich_baskets or inline_baskets
|
||
favorites = enrich_favorites or inline_favorites
|
||
|
||
# ── Image URL ──
|
||
image_url = raw_product.get("imageUrl", "")
|
||
if not image_url:
|
||
images = raw_product.get("images", [])
|
||
image_url = images[0] if isinstance(images, list) and images else ""
|
||
|
||
# ── Product URL ──
|
||
product_url = raw_product.get("url", "")
|
||
if not product_url and product_id:
|
||
product_url = f"https://www.trendyol.com/p/{product_id}"
|
||
elif product_url and not product_url.startswith("http"):
|
||
product_url = f"https://www.trendyol.com{product_url}" if product_url.startswith("/") else f"https://www.trendyol.com/{product_url}"
|
||
|
||
# ── Barcode ──
|
||
barcode = ""
|
||
winner_variant = raw_product.get("winnerVariant", {})
|
||
if isinstance(winner_variant, dict):
|
||
barcode = winner_variant.get("barcode", "")
|
||
|
||
# ── Country (origin) ──
|
||
country_code = ""
|
||
country_name = "Bilinmeyen"
|
||
merchant_listings = raw_product.get("merchantListings", [])
|
||
if merchant_listings:
|
||
custom_values = merchant_listings[0].get("customValues", [])
|
||
for cv in custom_values:
|
||
if cv.get("key") == "origin":
|
||
country_code = cv.get("value", "").upper()
|
||
country_name = COUNTRY_NAMES.get(
|
||
country_code, f"Diğer ({country_code})" if country_code else "Bilinmeyen"
|
||
)
|
||
break
|
||
|
||
# ── Satıcı bilgileri ──
|
||
seller_count = len(merchant_listings)
|
||
has_buybox = merchant_listings[0].get("isWinner", False) if merchant_listings else False
|
||
is_trendyol_product = barcode.startswith(("TYB", "SGT", "KPE", "RTN", "CDM")) if barcode else False
|
||
|
||
return {
|
||
"id": product_id,
|
||
"name": raw_product.get("name", ""),
|
||
"brand": brand_name,
|
||
"category": category_name,
|
||
"category_name": category_name, # Frontend uyumluluğu (ProductFinderTab, OpportunityTab)
|
||
"price": round(price, 2) if price else 0,
|
||
"rating": round(rating, 2),
|
||
"review_count": review_count,
|
||
"orders": orders,
|
||
"page_views": page_views,
|
||
"baskets": baskets,
|
||
"favorites": favorites,
|
||
"barcode": barcode,
|
||
"country_code": country_code,
|
||
"country": country_name,
|
||
"image_url": image_url or "https://via.placeholder.com/150",
|
||
"url": product_url,
|
||
"in_stock": raw_product.get("inStock", False),
|
||
"seller_count": seller_count,
|
||
"has_buybox": has_buybox,
|
||
"is_trendyol_product": is_trendyol_product,
|
||
}
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────
|
||
# 2. calculate_kpis
|
||
# ─────────────────────────────────────────────────────────
|
||
|
||
def calculate_kpis(products):
|
||
"""KPI hesaplaması (main.py 2182-2262 mantığı)."""
|
||
total_products = len(products)
|
||
prices = [p["price"] for p in products if p["price"] > 0]
|
||
ratings = [p["rating"] for p in products if p["rating"] > 0]
|
||
|
||
avg_price = sum(prices) / len(prices) if prices else 0
|
||
median_price = float(np.percentile(prices, 50)) if prices else 0
|
||
min_price = min(prices) if prices else 0
|
||
max_price = max(prices) if prices else 0
|
||
|
||
avg_rating = sum(ratings) / len(ratings) if ratings else 0
|
||
low_rating_count = sum(1 for r in ratings if r < 3.0)
|
||
low_rating_rate = (low_rating_count / len(ratings) * 100) if ratings else 0
|
||
|
||
unique_brands = set(p["brand"] for p in products if p["brand"] and p["brand"] != "Bilinmeyen")
|
||
unique_subcategories = set(p["category"] for p in products if p["category"])
|
||
|
||
return {
|
||
"total_products": total_products,
|
||
"total_subcategories": len(unique_subcategories),
|
||
"total_brands": len(unique_brands),
|
||
"avg_price": round(avg_price, 2),
|
||
"median_price": round(median_price, 2),
|
||
"avg_rating": round(avg_rating, 2),
|
||
"low_rating_count": low_rating_count,
|
||
"low_rating_rate": round(low_rating_rate, 2),
|
||
"min_price": round(min_price, 2),
|
||
"max_price": round(max_price, 2),
|
||
}
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────
|
||
# 3. calculate_charts
|
||
# ─────────────────────────────────────────────────────────
|
||
|
||
def calculate_charts(products):
|
||
"""Grafik verisi hesaplaması (main.py 2264-3248 mantığı)."""
|
||
prices = [p["price"] for p in products if p["price"] > 0]
|
||
total_products = len(products)
|
||
|
||
# ── Price distribution ──
|
||
price_ranges = {"0-100": 0, "100-250": 0, "250-500": 0, "500-1000": 0, "1000+": 0}
|
||
for price in prices:
|
||
if price < 100:
|
||
price_ranges["0-100"] += 1
|
||
elif price < 250:
|
||
price_ranges["100-250"] += 1
|
||
elif price < 500:
|
||
price_ranges["250-500"] += 1
|
||
elif price < 1000:
|
||
price_ranges["500-1000"] += 1
|
||
else:
|
||
price_ranges["1000+"] += 1
|
||
|
||
# ── Kategori ve marka grupları ──
|
||
categories_data = defaultdict(list)
|
||
brands_data = defaultdict(int)
|
||
for p in products:
|
||
categories_data[p["category"]].append(p)
|
||
brands_data[p["brand"]] += 1
|
||
|
||
# ── Top categories (satışa göre sıralı) ──
|
||
top_categories = []
|
||
for cat_name, cat_products in categories_data.items():
|
||
total_orders = sum(p["orders"] for p in cat_products)
|
||
top_categories.append({
|
||
"name": cat_name,
|
||
"count": len(cat_products),
|
||
"total_orders": total_orders,
|
||
})
|
||
top_categories = sorted(top_categories, key=lambda x: x["total_orders"], reverse=True)[:20]
|
||
|
||
# ── Top brands ──
|
||
top_brands = sorted(
|
||
[{"name": brand, "count": count} for brand, count in brands_data.items()],
|
||
key=lambda x: x["count"], reverse=True,
|
||
)[:20]
|
||
|
||
# ── Rating distribution ──
|
||
rating_distribution = {"0-1": 0, "1-2": 0, "2-3": 0, "3-4": 0, "4-5": 0}
|
||
for p in products:
|
||
r = p["rating"]
|
||
if r < 1:
|
||
rating_distribution["0-1"] += 1
|
||
elif r < 2:
|
||
rating_distribution["1-2"] += 1
|
||
elif r < 3:
|
||
rating_distribution["2-3"] += 1
|
||
elif r < 4:
|
||
rating_distribution["3-4"] += 1
|
||
else:
|
||
rating_distribution["4-5"] += 1
|
||
|
||
# ── Brand price boxplot (top 10) ──
|
||
brand_price_stats = []
|
||
for brand_name in [b["name"] for b in top_brands[:10]]:
|
||
bp = [p["price"] for p in products if p["brand"] == brand_name and p["price"] > 0]
|
||
if bp and len(bp) >= 4:
|
||
pcts = np.percentile(bp, [0, 25, 50, 75, 100])
|
||
brand_price_stats.append({
|
||
"brand": brand_name,
|
||
"min": round(float(pcts[0]), 2),
|
||
"q1": round(float(pcts[1]), 2),
|
||
"median": round(float(pcts[2]), 2),
|
||
"q3": round(float(pcts[3]), 2),
|
||
"max": round(float(pcts[4]), 2),
|
||
"count": len(bp),
|
||
})
|
||
|
||
# ── Scatter plot (price vs rating) — sample 500 ──
|
||
scatter_data = []
|
||
sample_size = min(500, len(products))
|
||
sampled = random.sample(products, sample_size) if products else []
|
||
for p in sampled:
|
||
if p["price"] > 0 and p["rating"] > 0:
|
||
scatter_data.append({
|
||
"price": p["price"],
|
||
"rating": p["rating"],
|
||
"brand": p["brand"],
|
||
"in_stock": p["in_stock"],
|
||
})
|
||
|
||
# ── Brand strength score ──
|
||
brand_strength_scores = []
|
||
for brand_name in [b["name"] for b in top_brands[:10]]:
|
||
bp = [p for p in products if p["brand"] == brand_name]
|
||
brand_count = len(bp)
|
||
brand_share = (brand_count / total_products * 100) if total_products > 0 else 0
|
||
brand_ratings = [p["rating"] for p in bp if p["rating"] > 0]
|
||
brand_avg_rating = sum(brand_ratings) / len(brand_ratings) if brand_ratings else 0
|
||
brand_out_of_stock = sum(1 for p in bp if not p["in_stock"])
|
||
stockout_rate = (brand_out_of_stock / brand_count * 100) if brand_count > 0 else 0
|
||
strength = brand_share + (brand_avg_rating * 5) - stockout_rate
|
||
brand_strength_scores.append({
|
||
"brand": brand_name,
|
||
"share": round(brand_share, 2),
|
||
"avg_rating": round(brand_avg_rating, 2),
|
||
"stockout_rate": round(stockout_rate, 2),
|
||
"strength_score": round(strength, 2),
|
||
})
|
||
brand_strength_scores.sort(key=lambda x: x["strength_score"], reverse=True)
|
||
|
||
# ── Heatmap: Brand × Category ──
|
||
top_10_brands = [b["name"] for b in top_brands[:10]]
|
||
top_10_cats = [c["name"] for c in top_categories[:10]]
|
||
heatmap_data = []
|
||
for cat_name in top_10_cats:
|
||
cat_products = categories_data.get(cat_name, [])
|
||
for brand_name in top_10_brands:
|
||
count = sum(1 for p in cat_products if p["brand"] == brand_name)
|
||
if count > 0:
|
||
heatmap_data.append({"brand": brand_name, "category": cat_name, "value": count})
|
||
|
||
# ── Category price premium ──
|
||
avg_price = sum(prices) / len(prices) if prices else 0
|
||
category_price_analysis = []
|
||
for cat_name, cat_products in categories_data.items():
|
||
cp = [p["price"] for p in cat_products if p["price"] > 0]
|
||
if cp:
|
||
cat_avg = sum(cp) / len(cp)
|
||
cat_median = float(np.percentile(cp, 50))
|
||
premium = ((cat_avg - avg_price) / avg_price * 100) if avg_price > 0 else 0
|
||
category_price_analysis.append({
|
||
"category": cat_name,
|
||
"avg_price": round(cat_avg, 2),
|
||
"median_price": round(cat_median, 2),
|
||
"price_premium": round(premium, 2),
|
||
"product_count": len(cp),
|
||
"min_price": round(min(cp), 2),
|
||
"max_price": round(max(cp), 2),
|
||
})
|
||
category_price_analysis.sort(key=lambda x: x["price_premium"], reverse=True)
|
||
most_expensive = [c for c in category_price_analysis if c["price_premium"] > 0][:10]
|
||
most_affordable = [c for c in category_price_analysis if c["price_premium"] < 0][-10:]
|
||
most_affordable.reverse()
|
||
|
||
# ── Origin analysis ──
|
||
origin_counts = defaultdict(int)
|
||
products_with_origin = 0
|
||
for p in products:
|
||
if p["country_code"]:
|
||
origin_counts[p["country_code"]] += 1
|
||
products_with_origin += 1
|
||
|
||
origin_country_data = sorted(
|
||
[
|
||
{
|
||
"country_code": code,
|
||
"country_name": COUNTRY_NAMES.get(code, f"Diğer ({code})"),
|
||
"product_count": count,
|
||
"percentage": round(count / products_with_origin * 100, 2) if products_with_origin else 0,
|
||
}
|
||
for code, count in origin_counts.items()
|
||
],
|
||
key=lambda x: x["product_count"], reverse=True,
|
||
)
|
||
|
||
# ── Barcode analysis ──
|
||
barcode_prefixes = defaultdict(int)
|
||
barcode_countries_detected = defaultdict(int)
|
||
products_with_barcode = 0
|
||
for p in products:
|
||
bc = p.get("barcode", "")
|
||
if bc and len(bc) >= 3:
|
||
products_with_barcode += 1
|
||
prefix = bc[:3]
|
||
barcode_prefixes[prefix] += 1
|
||
detected = _detect_barcode_country(prefix)
|
||
barcode_countries_detected[detected] += 1
|
||
|
||
barcode_prefix_data = sorted(
|
||
[
|
||
{
|
||
"prefix": prefix,
|
||
"detected_country": _detect_barcode_country(prefix),
|
||
"product_count": count,
|
||
"percentage": round(count / products_with_barcode * 100, 2) if products_with_barcode else 0,
|
||
}
|
||
for prefix, count in barcode_prefixes.items()
|
||
],
|
||
key=lambda x: x["product_count"], reverse=True,
|
||
)[:20]
|
||
|
||
barcode_country_data = sorted(
|
||
[
|
||
{
|
||
"country_name": country,
|
||
"product_count": count,
|
||
"percentage": round(count / products_with_barcode * 100, 2) if products_with_barcode else 0,
|
||
}
|
||
for country, count in barcode_countries_detected.items()
|
||
],
|
||
key=lambda x: x["product_count"], reverse=True,
|
||
)
|
||
|
||
# ── Merchant analysis ──
|
||
merchants_data = {}
|
||
total_winners = 0
|
||
products_with_merchant = 0
|
||
# We need raw product data for merchant analysis — use the flat products
|
||
# Merchant info is already lost in normalization, so we skip this in consolidator
|
||
# The original code extracted from raw_product.merchantListings
|
||
# For consolidated data, we'll build merchants from the products we have
|
||
|
||
# ── Build result ──
|
||
return {
|
||
"price_distribution": price_ranges,
|
||
"top_categories": top_categories,
|
||
"top_brands": top_brands,
|
||
"rating_distribution": rating_distribution,
|
||
"brand_price_boxplot": brand_price_stats,
|
||
"price_rating_scatter": scatter_data,
|
||
"brand_strength": brand_strength_scores,
|
||
"brand_category_heatmap": heatmap_data,
|
||
"category_price_premium": {
|
||
"all_categories": category_price_analysis,
|
||
"most_expensive": most_expensive,
|
||
"most_affordable": most_affordable,
|
||
},
|
||
"origin_analysis": {
|
||
"countries": origin_country_data,
|
||
"top_countries": origin_country_data[:10],
|
||
"total_products_with_origin": products_with_origin,
|
||
"coverage_percentage": round(products_with_origin / total_products * 100, 2) if total_products else 0,
|
||
},
|
||
"barcode_analysis": {
|
||
"prefixes": barcode_prefix_data,
|
||
"countries_from_barcode": barcode_country_data,
|
||
"top_countries_from_barcode": barcode_country_data[:10],
|
||
"total_products_with_barcode": products_with_barcode,
|
||
"coverage_percentage": round(products_with_barcode / total_products * 100, 2) if total_products else 0,
|
||
},
|
||
}
|
||
|
||
|
||
def _calculate_merchant_analysis(raw_products, categories_data):
|
||
"""
|
||
Satıcı analizini ham ürün verisinden hesapla (merchantListings alanı gerekli).
|
||
raw_products: ham Trendyol ürün dict listesi, categories_data: {cat_name: [products]}
|
||
"""
|
||
merchants_data = {}
|
||
total_winners = 0
|
||
products_with_merchant = 0
|
||
|
||
for product in raw_products:
|
||
merchant_listings = product.get("merchantListings", [])
|
||
if not merchant_listings:
|
||
continue
|
||
ml = merchant_listings[0]
|
||
merchant = ml.get("merchant", {})
|
||
merchant_id = merchant.get("id")
|
||
if not merchant_id:
|
||
continue
|
||
|
||
products_with_merchant += 1
|
||
if merchant_id not in merchants_data:
|
||
merchant_name = merchant.get("name") or merchant.get("officialName") or f"Satıcı {merchant_id}"
|
||
merchants_data[merchant_id] = {
|
||
"merchant_id": merchant_id,
|
||
"merchant_name": merchant_name,
|
||
"product_count": 0,
|
||
"total_price": 0,
|
||
"winner_count": 0,
|
||
}
|
||
|
||
merchants_data[merchant_id]["product_count"] += 1
|
||
price = _extract_price(product)
|
||
if price > 0:
|
||
merchants_data[merchant_id]["total_price"] += price
|
||
if ml.get("isWinner"):
|
||
merchants_data[merchant_id]["winner_count"] += 1
|
||
total_winners += 1
|
||
|
||
merchant_list = []
|
||
for mid, data in merchants_data.items():
|
||
avg_price = data["total_price"] / data["product_count"] if data["product_count"] > 0 else 0
|
||
winner_ratio = (data["winner_count"] / data["product_count"] * 100) if data["product_count"] > 0 else 0
|
||
merchant_url = None
|
||
if data["merchant_name"] and not data["merchant_name"].startswith("Satıcı "):
|
||
merchant_url = f"https://www.trendyol.com/magaza/{data['merchant_name'].lower().replace(' ', '-')}-m-{mid}"
|
||
merchant_list.append({
|
||
"merchant_id": mid,
|
||
"merchant_name": data["merchant_name"],
|
||
"merchant_url": merchant_url,
|
||
"product_count": data["product_count"],
|
||
"avg_price": round(avg_price, 2),
|
||
"winner_count": data["winner_count"],
|
||
"winner_ratio": round(winner_ratio, 2),
|
||
})
|
||
|
||
merchant_list.sort(key=lambda x: x["product_count"], reverse=True)
|
||
total_products = len(raw_products)
|
||
total_merchants = len(merchants_data)
|
||
winner_percentage = (total_winners / products_with_merchant * 100) if products_with_merchant > 0 else 0
|
||
|
||
return {
|
||
"merchants": merchant_list,
|
||
"top_merchants": merchant_list[:20],
|
||
"total_merchants": total_merchants,
|
||
"total_products_with_merchant": products_with_merchant,
|
||
"total_winners": total_winners,
|
||
"winner_percentage": round(winner_percentage, 2),
|
||
"coverage_percentage": round(products_with_merchant / total_products * 100, 2) if total_products else 0,
|
||
}
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────
|
||
# 4. calculate_insights
|
||
# ─────────────────────────────────────────────────────────
|
||
|
||
def calculate_insights(products):
|
||
"""Low-rating ürünler ve fiyat anomalileri."""
|
||
# ── Low rating products ──
|
||
low_rating = []
|
||
for p in products:
|
||
if 0 < p["rating"] < 3.0:
|
||
low_rating.append({
|
||
"name": p["name"][:50],
|
||
"brand": p["brand"],
|
||
"rating": p["rating"],
|
||
"price": p["price"],
|
||
"in_stock": p["in_stock"],
|
||
})
|
||
low_rating = sorted(low_rating, key=lambda x: x["rating"])[:20]
|
||
|
||
# ── Anomalies (IQR) ──
|
||
prices = [p["price"] for p in products if p["price"] > 0]
|
||
anomalies = []
|
||
if len(prices) > 4:
|
||
q1, q3 = np.percentile(prices, [25, 75])
|
||
iqr = q3 - q1
|
||
lower = q1 - 1.5 * iqr
|
||
upper = q3 + 1.5 * iqr
|
||
for p in products:
|
||
if p["price"] > 0 and (p["price"] < lower or p["price"] > upper):
|
||
anomalies.append({
|
||
"name": p["name"][:50],
|
||
"brand": p["brand"],
|
||
"price": p["price"],
|
||
"type": "expensive" if p["price"] > upper else "cheap",
|
||
})
|
||
anomalies = sorted(anomalies, key=lambda x: x["price"], reverse=True)[:20]
|
||
|
||
return {"low_rating_products": low_rating, "anomalies": anomalies}
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────
|
||
# 5. build_consolidated_report (ana orkestratör)
|
||
# ─────────────────────────────────────────────────────────
|
||
|
||
def build_consolidated_report(report_id, db, reports_dir, social_data=None):
|
||
"""
|
||
Rapor verisini yükle → normalize et → hesapla → döndür.
|
||
|
||
Args:
|
||
report_id: DB rapor ID
|
||
db: SQLAlchemy session
|
||
reports_dir: reports/ klasör yolu
|
||
social_data: Enrichment social.json verisi (opsiyonel, yoksa dosyadan okunur)
|
||
Returns:
|
||
Konsolide dashboard dict
|
||
"""
|
||
from database import Report
|
||
t0 = time.time()
|
||
|
||
report = db.query(Report).filter(Report.id == report_id).first()
|
||
if not report:
|
||
return None
|
||
if not report.json_file_path or not os.path.exists(report.json_file_path):
|
||
return None
|
||
|
||
# Rapor meta verisini oku
|
||
with open(report.json_file_path, "r", encoding="utf-8") as f:
|
||
report_data = json.load(f)
|
||
|
||
# Social proof verisini yükle
|
||
social_details = {}
|
||
if social_data:
|
||
social_details = social_data.get("details", {})
|
||
else:
|
||
social_file = os.path.join(reports_dir, f"enrich_{report_id}", "social.json")
|
||
if os.path.exists(social_file):
|
||
try:
|
||
with open(social_file, "r", encoding="utf-8") as f:
|
||
soc = json.load(f)
|
||
social_details = soc.get("details", {})
|
||
except Exception as e:
|
||
log.warning(f"Social proof dosyası okunamadı: {e}")
|
||
|
||
# ── Ham ürünleri yükle ve normalize et ──
|
||
normalized_products = []
|
||
raw_products_all = [] # Merchant analizi için ham verileri tut
|
||
|
||
for detail in report_data.get("details", []):
|
||
if not detail.get("success") or not detail.get("file_path"):
|
||
continue
|
||
file_path = detail["file_path"]
|
||
if not os.path.exists(file_path):
|
||
continue
|
||
try:
|
||
with open(file_path, "r", encoding="utf-8") as f:
|
||
cat_data = json.load(f)
|
||
raw_products = cat_data.get("products", [])
|
||
cat_name_raw = detail.get("category_name", "")
|
||
cat_name = re.sub(r'\s+\d+$', '', cat_name_raw)
|
||
|
||
for raw in raw_products:
|
||
# Set category on raw product for load_report_products compatibility
|
||
if isinstance(raw.get("category"), dict):
|
||
raw["category"]["name"] = cat_name
|
||
else:
|
||
raw["category"] = {"id": 0, "name": cat_name}
|
||
|
||
norm = normalize_product(raw, cat_name, social_details)
|
||
if norm["price"] and norm["category"]:
|
||
normalized_products.append(norm)
|
||
|
||
raw_products_all.extend(raw_products)
|
||
except (json.JSONDecodeError, OSError, KeyError) as e:
|
||
log.warning(f"Kategori dosyası okunamadı: {file_path}: {e}")
|
||
continue
|
||
|
||
if not normalized_products:
|
||
log.warning(f"Rapor {report_id} için ürün bulunamadı — boş dashboard döndürülüyor")
|
||
return {
|
||
"metadata": {
|
||
"report_id": report_id,
|
||
"report_name": report.name,
|
||
"created_at": report.created_at.isoformat() if report.created_at else None,
|
||
"total_products": 0,
|
||
"total_categories": 0,
|
||
"consolidated_at": datetime.now().isoformat(),
|
||
"empty": True,
|
||
},
|
||
"report_id": report_id,
|
||
"report_name": report.name,
|
||
"products": [],
|
||
"all_products": [],
|
||
"kpis": calculate_kpis([]),
|
||
"charts": calculate_charts([]),
|
||
"insights": calculate_insights([]),
|
||
}
|
||
|
||
# ── Hesaplamalar ──
|
||
kpis = calculate_kpis(normalized_products)
|
||
charts = calculate_charts(normalized_products)
|
||
insights = calculate_insights(normalized_products)
|
||
|
||
# Merchant analysis (ham veri gerekli)
|
||
charts["merchant_analysis"] = _calculate_merchant_analysis(raw_products_all, {})
|
||
|
||
elapsed = time.time() - t0
|
||
log.info(f"Rapor {report_id} konsolide edildi: {len(normalized_products)} ürün, {elapsed:.2f}s")
|
||
|
||
return {
|
||
"metadata": {
|
||
"report_id": report_id,
|
||
"report_name": report.name,
|
||
"created_at": report.created_at.isoformat() if report.created_at else None,
|
||
"total_products": len(normalized_products),
|
||
"total_categories": kpis["total_subcategories"],
|
||
"consolidated_at": datetime.now().isoformat(),
|
||
},
|
||
"report_id": report_id,
|
||
"report_name": report.name,
|
||
"products": normalized_products,
|
||
"all_products": normalized_products, # Geriye uyumluluk (frontend "all_products" bekliyor)
|
||
"kpis": kpis,
|
||
"charts": charts,
|
||
"insights": insights,
|
||
}
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────
|
||
# 6. save / load
|
||
# ─────────────────────────────────────────────────────────
|
||
|
||
def save_consolidated_report(report_id, data, reports_dir):
|
||
"""Konsolide veriyi reports/report_{id}_data.json olarak kaydet."""
|
||
path = os.path.join(reports_dir, f"report_{report_id}_data.json")
|
||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||
with open(path, "w", encoding="utf-8") as f:
|
||
json.dump(data, f, ensure_ascii=False)
|
||
log.info(f"Konsolide rapor kaydedildi: {path}")
|
||
return path
|
||
|
||
|
||
def load_consolidated_report(report_id, reports_dir):
|
||
"""Konsolide dosya varsa oku, yoksa None döndür."""
|
||
path = os.path.join(reports_dir, f"report_{report_id}_data.json")
|
||
if os.path.exists(path):
|
||
try:
|
||
with open(path, "r", encoding="utf-8") as f:
|
||
return json.load(f)
|
||
except (json.JSONDecodeError, OSError) as e:
|
||
log.warning(f"Konsolide dosya okunamadı: {path}: {e}")
|
||
return None
|