mirror of
https://github.com/nethunterzist/trendyol-analiz
synced 2026-07-01 01:17:04 +00:00
- Add POST /categories/bulk-import endpoint for hierarchical category import - Add parent_id field to CategoryUpdate model - Support parent-child relationships in category updates Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
4932 lines
198 KiB
Python
4932 lines
198 KiB
Python
"""
|
||
FastAPI Backend for Trendyol Admin Panel
|
||
"""
|
||
from fastapi import FastAPI, HTTPException, Depends, BackgroundTasks
|
||
from fastapi.middleware.cors import CORSMiddleware
|
||
from fastapi.responses import StreamingResponse
|
||
import asyncio
|
||
import json as json_module
|
||
from sqlalchemy.orm import Session
|
||
from sqlalchemy import func
|
||
from typing import List, Optional
|
||
from pydantic import BaseModel
|
||
from datetime import datetime
|
||
import uuid
|
||
from functools import lru_cache
|
||
import numpy as np
|
||
import random
|
||
import requests
|
||
import time
|
||
import re
|
||
from collections import OrderedDict
|
||
from threading import Lock
|
||
import os
|
||
|
||
from database import SessionLocal, Category, Snapshot, Report, EnrichmentError, init_db
|
||
from google_trends_helper import estimate_traffic_sources, fetch_google_trends
|
||
|
||
# Initialize database on startup
|
||
init_db()
|
||
|
||
# GS1 Barcode Prefix to Country Mapping (EAN-13 / EAN-8)
|
||
# Source: https://www.gs1.org/standards/id-keys/company-prefix
|
||
BARCODE_PREFIX_TO_COUNTRY = {
|
||
# Turkey
|
||
"869": "Türkiye",
|
||
|
||
# Europe
|
||
"300": "Fransa", "301": "Fransa", "302": "Fransa", "303": "Fransa", "304": "Fransa",
|
||
"305": "Fransa", "306": "Fransa", "307": "Fransa", "308": "Fransa", "309": "Fransa",
|
||
"310": "Fransa", "311": "Fransa", "312": "Fransa", "313": "Fransa", "314": "Fransa",
|
||
"315": "Fransa", "316": "Fransa", "317": "Fransa", "318": "Fransa", "319": "Fransa",
|
||
"320": "Fransa", "321": "Fransa", "322": "Fransa", "323": "Fransa", "324": "Fransa",
|
||
"325": "Fransa", "326": "Fransa", "327": "Fransa", "328": "Fransa", "329": "Fransa",
|
||
"330": "Fransa", "331": "Fransa", "332": "Fransa", "333": "Fransa", "334": "Fransa",
|
||
"335": "Fransa", "336": "Fransa", "337": "Fransa", "338": "Fransa", "339": "Fransa",
|
||
"340": "Fransa", "341": "Fransa", "342": "Fransa", "343": "Fransa", "344": "Fransa",
|
||
"345": "Fransa", "346": "Fransa", "347": "Fransa", "348": "Fransa", "349": "Fransa",
|
||
"350": "Fransa", "351": "Fransa", "352": "Fransa", "353": "Fransa", "354": "Fransa",
|
||
"355": "Fransa", "356": "Fransa", "357": "Fransa", "358": "Fransa", "359": "Fransa",
|
||
"360": "Fransa", "361": "Fransa", "362": "Fransa", "363": "Fransa", "364": "Fransa",
|
||
"365": "Fransa", "366": "Fransa", "367": "Fransa", "368": "Fransa", "369": "Fransa",
|
||
"370": "Fransa", "371": "Fransa", "372": "Fransa", "373": "Fransa", "374": "Fransa",
|
||
"375": "Fransa", "376": "Fransa", "377": "Fransa", "378": "Fransa", "379": "Fransa",
|
||
"380": "Bulgaristan",
|
||
"383": "Slovenya",
|
||
"385": "Hırvatistan",
|
||
"387": "Bosna Hersek",
|
||
"389": "Karadağ",
|
||
"400": "Almanya", "401": "Almanya", "402": "Almanya", "403": "Almanya", "404": "Almanya",
|
||
"405": "Almanya", "406": "Almanya", "407": "Almanya", "408": "Almanya", "409": "Almanya",
|
||
"410": "Almanya", "411": "Almanya", "412": "Almanya", "413": "Almanya", "414": "Almanya",
|
||
"415": "Almanya", "416": "Almanya", "417": "Almanya", "418": "Almanya", "419": "Almanya",
|
||
"420": "Almanya", "421": "Almanya", "422": "Almanya", "423": "Almanya", "424": "Almanya",
|
||
"425": "Almanya", "426": "Almanya", "427": "Almanya", "428": "Almanya", "429": "Almanya",
|
||
"430": "Almanya", "431": "Almanya", "432": "Almanya", "433": "Almanya", "434": "Almanya",
|
||
"435": "Almanya", "436": "Almanya", "437": "Almanya", "438": "Almanya", "439": "Almanya",
|
||
"440": "Almanya",
|
||
"450": "Japonya", "451": "Japonya", "452": "Japonya", "453": "Japonya", "454": "Japonya",
|
||
"455": "Japonya", "456": "Japonya", "457": "Japonya", "458": "Japonya", "459": "Japonya",
|
||
"460": "Rusya", "461": "Rusya", "462": "Rusya", "463": "Rusya", "464": "Rusya",
|
||
"465": "Rusya", "466": "Rusya", "467": "Rusya", "468": "Rusya", "469": "Rusya",
|
||
"470": "Kırgızistan",
|
||
"471": "Tayvan",
|
||
"474": "Estonya",
|
||
"475": "Letonya",
|
||
"476": "Azerbaycan",
|
||
"477": "Litvanya",
|
||
"478": "Özbekistan",
|
||
"479": "Sri Lanka",
|
||
"480": "Filipinler",
|
||
"481": "Belarus",
|
||
"482": "Ukrayna",
|
||
"483": "Türkmenistan",
|
||
"484": "Moldova",
|
||
"485": "Ermenistan",
|
||
"486": "Gürcistan",
|
||
"487": "Kazakistan",
|
||
"488": "Tacikistan",
|
||
"489": "Hong Kong",
|
||
"490": "Japonya", "491": "Japonya", "492": "Japonya", "493": "Japonya", "494": "Japonya",
|
||
"495": "Japonya", "496": "Japonya", "497": "Japonya", "498": "Japonya", "499": "Japonya",
|
||
|
||
# Asia Pacific
|
||
"500": "İngiltere", "501": "İngiltere", "502": "İngiltere", "503": "İngiltere", "504": "İngiltere",
|
||
"505": "İngiltere", "506": "İngiltere", "507": "İngiltere", "508": "İngiltere", "509": "İngiltere",
|
||
"520": "Yunanistan", "521": "Yunanistan",
|
||
"528": "Lübnan",
|
||
"529": "Kıbrıs",
|
||
"530": "Arnavutluk",
|
||
"531": "Makedonya",
|
||
"535": "Malta",
|
||
"539": "İrlanda",
|
||
"540": "Belçika", "541": "Belçika", "542": "Belçika", "543": "Belçika", "544": "Belçika",
|
||
"545": "Belçika", "546": "Belçika",
|
||
"560": "Portekiz",
|
||
"569": "İzlanda",
|
||
"570": "Danimarka", "571": "Danimarka", "572": "Danimarka", "573": "Danimarka", "574": "Danimarka",
|
||
"575": "Danimarka", "576": "Danimarka", "577": "Danimarka", "578": "Danimarka", "579": "Danimarka",
|
||
"590": "Polonya",
|
||
"594": "Romanya",
|
||
"599": "Macaristan",
|
||
"600": "Güney Afrika", "601": "Güney Afrika",
|
||
"603": "Gana",
|
||
"604": "Senegal",
|
||
"608": "Bahreyn",
|
||
"609": "Moritanya",
|
||
"611": "Fas",
|
||
"613": "Cezayir",
|
||
"615": "Nijerya",
|
||
"616": "Kenya",
|
||
"618": "Fildişi Sahili",
|
||
"619": "Tunus",
|
||
"620": "Tanzanya",
|
||
"621": "Suriye",
|
||
"622": "Mısır",
|
||
"623": "Brunei",
|
||
"624": "Libya",
|
||
"625": "Ürdün",
|
||
"626": "İran",
|
||
"627": "Kuveyt",
|
||
"628": "Suudi Arabistan",
|
||
"629": "Birleşik Arap Emirlikleri",
|
||
"640": "Finlandiya", "641": "Finlandiya", "642": "Finlandiya", "643": "Finlandiya",
|
||
"644": "Finlandiya", "645": "Finlandiya", "646": "Finlandiya", "647": "Finlandiya",
|
||
"648": "Finlandiya", "649": "Finlandiya",
|
||
"690": "Çin", "691": "Çin", "692": "Çin", "693": "Çin", "694": "Çin",
|
||
"695": "Çin", "696": "Çin", "697": "Çin", "698": "Çin", "699": "Çin",
|
||
"700": "Norveç", "701": "Norveç", "702": "Norveç", "703": "Norveç", "704": "Norveç",
|
||
"705": "Norveç", "706": "Norveç", "707": "Norveç", "708": "Norveç", "709": "Norveç",
|
||
"729": "İsrail",
|
||
"730": "İsveç", "731": "İsveç", "732": "İsveç", "733": "İsveç", "734": "İsveç",
|
||
"735": "İsveç", "736": "İsveç", "737": "İsveç", "738": "İsveç", "739": "İsveç",
|
||
"740": "Guatemala",
|
||
"741": "El Salvador",
|
||
"742": "Honduras",
|
||
"743": "Nikaragua",
|
||
"744": "Kosta Rika",
|
||
"745": "Panama",
|
||
"746": "Dominik Cumhuriyeti",
|
||
"750": "Meksika",
|
||
"754": "Kanada", "755": "Kanada",
|
||
"759": "Venezuela",
|
||
"760": "İsviçre", "761": "İsviçre", "762": "İsviçre", "763": "İsviçre", "764": "İsviçre",
|
||
"765": "İsviçre", "766": "İsviçre", "767": "İsviçre", "768": "İsviçre", "769": "İsviçre",
|
||
"770": "Kolombiya", "771": "Kolombiya",
|
||
"773": "Uruguay",
|
||
"775": "Peru",
|
||
"777": "Bolivya",
|
||
"778": "Arjantin", "779": "Arjantin",
|
||
"780": "Şili",
|
||
"784": "Paraguay",
|
||
"786": "Ekvador",
|
||
"789": "Brezilya", "790": "Brezilya",
|
||
"800": "İtalya", "801": "İtalya", "802": "İtalya", "803": "İtalya", "804": "İtalya",
|
||
"805": "İtalya", "806": "İtalya", "807": "İtalya", "808": "İtalya", "809": "İtalya",
|
||
"810": "İtalya", "811": "İtalya", "812": "İtalya", "813": "İtalya", "814": "İtalya",
|
||
"815": "İtalya", "816": "İtalya", "817": "İtalya", "818": "İtalya", "819": "İtalya",
|
||
"820": "İtalya", "821": "İtalya", "822": "İtalya", "823": "İtalya", "824": "İtalya",
|
||
"825": "İtalya", "826": "İtalya", "827": "İtalya", "828": "İtalya", "829": "İtalya",
|
||
"830": "İtalya", "831": "İtalya", "832": "İtalya", "833": "İtalya", "834": "İtalya",
|
||
"835": "İtalya", "836": "İtalya", "837": "İtalya", "838": "İtalya", "839": "İtalya",
|
||
"840": "İspanya", "841": "İspanya", "842": "İspanya", "843": "İspanya", "844": "İspanya",
|
||
"845": "İspanya", "846": "İspanya", "847": "İspanya", "848": "İspanya", "849": "İspanya",
|
||
"850": "Küba",
|
||
"858": "Slovakya",
|
||
"859": "Çekya",
|
||
"860": "Sırbistan",
|
||
"865": "Moğolistan",
|
||
"867": "Kuzey Kore",
|
||
"868": "Türkiye",
|
||
"870": "Hollanda", "871": "Hollanda", "872": "Hollanda", "873": "Hollanda", "874": "Hollanda",
|
||
"875": "Hollanda", "876": "Hollanda", "877": "Hollanda", "878": "Hollanda", "879": "Hollanda",
|
||
"880": "Güney Kore",
|
||
"884": "Kamboçya",
|
||
"885": "Tayland",
|
||
"888": "Singapur",
|
||
"890": "Hindistan", "891": "Hindistan", "892": "Hindistan", "893": "Hindistan",
|
||
"894": "Hindistan", "895": "Hindistan", "896": "Hindistan", "897": "Hindistan",
|
||
"898": "Hindistan", "899": "Hindistan",
|
||
"900": "Avusturya", "901": "Avusturya", "902": "Avusturya", "903": "Avusturya",
|
||
"904": "Avusturya", "905": "Avusturya", "906": "Avusturya", "907": "Avusturya",
|
||
"908": "Avusturya", "909": "Avusturya",
|
||
"930": "Avustralya", "931": "Avustralya", "932": "Avustralya", "933": "Avustralya",
|
||
"934": "Avustralya", "935": "Avustralya", "936": "Avustralya", "937": "Avustralya",
|
||
"938": "Avustralya", "939": "Avustralya",
|
||
"940": "Yeni Zelanda", "941": "Yeni Zelanda", "942": "Yeni Zelanda", "943": "Yeni Zelanda",
|
||
"944": "Yeni Zelanda", "945": "Yeni Zelanda", "946": "Yeni Zelanda", "947": "Yeni Zelanda",
|
||
"948": "Yeni Zelanda", "949": "Yeni Zelanda",
|
||
"955": "Malezya",
|
||
"958": "Makao",
|
||
|
||
# North America
|
||
"000": "ABD/Kanada", "001": "ABD/Kanada", "002": "ABD/Kanada", "003": "ABD/Kanada",
|
||
"004": "ABD/Kanada", "005": "ABD/Kanada", "006": "ABD/Kanada", "007": "ABD/Kanada",
|
||
"008": "ABD/Kanada", "009": "ABD/Kanada", "010": "ABD/Kanada", "011": "ABD/Kanada",
|
||
"012": "ABD/Kanada", "013": "ABD/Kanada", "019": "ABD/Kanada",
|
||
"020": "ABD/Kanada", "021": "ABD/Kanada", "022": "ABD/Kanada", "023": "ABD/Kanada",
|
||
"024": "ABD/Kanada", "025": "ABD/Kanada", "026": "ABD/Kanada", "027": "ABD/Kanada",
|
||
"028": "ABD/Kanada", "029": "ABD/Kanada", "030": "ABD/Kanada", "031": "ABD/Kanada",
|
||
"032": "ABD/Kanada", "033": "ABD/Kanada", "034": "ABD/Kanada", "035": "ABD/Kanada",
|
||
"036": "ABD/Kanada", "037": "ABD/Kanada", "038": "ABD/Kanada", "039": "ABD/Kanada",
|
||
"040": "ABD/Kanada", "041": "ABD/Kanada", "042": "ABD/Kanada", "043": "ABD/Kanada",
|
||
"044": "ABD/Kanada", "045": "ABD/Kanada", "046": "ABD/Kanada", "047": "ABD/Kanada",
|
||
"048": "ABD/Kanada", "049": "ABD/Kanada", "050": "ABD/Kanada", "051": "ABD/Kanada",
|
||
"052": "ABD/Kanada", "053": "ABD/Kanada", "054": "ABD/Kanada", "055": "ABD/Kanada",
|
||
"056": "ABD/Kanada", "057": "ABD/Kanada", "058": "ABD/Kanada", "059": "ABD/Kanada",
|
||
"060": "ABD/Kanada", "061": "ABD/Kanada", "062": "ABD/Kanada", "063": "ABD/Kanada",
|
||
"064": "ABD/Kanada", "065": "ABD/Kanada", "066": "ABD/Kanada", "067": "ABD/Kanada",
|
||
"068": "ABD/Kanada", "069": "ABD/Kanada", "070": "ABD/Kanada", "071": "ABD/Kanada",
|
||
"072": "ABD/Kanada", "073": "ABD/Kanada", "074": "ABD/Kanada", "075": "ABD/Kanada",
|
||
"076": "ABD/Kanada", "077": "ABD/Kanada", "078": "ABD/Kanada", "079": "ABD/Kanada",
|
||
"080": "ABD/Kanada", "081": "ABD/Kanada", "082": "ABD/Kanada", "083": "ABD/Kanada",
|
||
"084": "ABD/Kanada", "085": "ABD/Kanada", "086": "ABD/Kanada", "087": "ABD/Kanada",
|
||
"088": "ABD/Kanada", "089": "ABD/Kanada", "090": "ABD/Kanada", "091": "ABD/Kanada",
|
||
"092": "ABD/Kanada", "093": "ABD/Kanada", "094": "ABD/Kanada", "095": "ABD/Kanada",
|
||
"096": "ABD/Kanada", "097": "ABD/Kanada", "098": "ABD/Kanada", "099": "ABD/Kanada",
|
||
"100": "ABD/Kanada", "101": "ABD/Kanada", "102": "ABD/Kanada", "103": "ABD/Kanada",
|
||
"104": "ABD/Kanada", "105": "ABD/Kanada", "106": "ABD/Kanada", "107": "ABD/Kanada",
|
||
"108": "ABD/Kanada", "109": "ABD/Kanada", "110": "ABD/Kanada", "111": "ABD/Kanada",
|
||
"112": "ABD/Kanada", "113": "ABD/Kanada", "114": "ABD/Kanada", "115": "ABD/Kanada",
|
||
"116": "ABD/Kanada", "117": "ABD/Kanada", "118": "ABD/Kanada", "119": "ABD/Kanada",
|
||
"120": "ABD/Kanada", "121": "ABD/Kanada", "122": "ABD/Kanada", "123": "ABD/Kanada",
|
||
"124": "ABD/Kanada", "125": "ABD/Kanada", "126": "ABD/Kanada", "127": "ABD/Kanada",
|
||
"128": "ABD/Kanada", "129": "ABD/Kanada", "130": "ABD/Kanada", "131": "ABD/Kanada",
|
||
"132": "ABD/Kanada", "133": "ABD/Kanada", "134": "ABD/Kanada", "135": "ABD/Kanada",
|
||
"136": "ABD/Kanada", "137": "ABD/Kanada", "138": "ABD/Kanada", "139": "ABD/Kanada",
|
||
}
|
||
|
||
def get_country_from_barcode(barcode: str) -> str:
|
||
"""
|
||
Extract country from barcode prefix (first 3 digits)
|
||
Returns country name or 'Bilinmeyen' if not found
|
||
"""
|
||
if not barcode or len(barcode) < 3:
|
||
return "Bilinmeyen"
|
||
|
||
prefix = barcode[:3]
|
||
return BARCODE_PREFIX_TO_COUNTRY.get(prefix, "Bilinmeyen")
|
||
|
||
app = FastAPI(title="Trendyol Admin API", version="1.0.0")
|
||
|
||
# Base directory for resolving relative paths
|
||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||
|
||
# File paths - configurable via environment variables for Docker deployment
|
||
# Local development: uses relative paths from backend directory
|
||
# Docker: uses /data/* directories mounted as volumes
|
||
REPORTS_DIR = os.getenv("REPORTS_DIR", os.path.join(BASE_DIR, "..", "reports"))
|
||
CATEGORIES_DIR = os.getenv("CATEGORIES_DIR", os.path.join(BASE_DIR, "..", "categories"))
|
||
DATABASE_PATH = os.getenv("DATABASE_PATH", os.path.join(BASE_DIR, "trendyol.db"))
|
||
|
||
# CORS for React admin panel
|
||
# Security: Specify exact origins instead of wildcard
|
||
# Supports: Local development, Docker Compose, and production deployment
|
||
allowed_origins = [
|
||
# Local development (Vite dev server)
|
||
"http://localhost:5173",
|
||
"http://localhost:5174",
|
||
"http://localhost:5175",
|
||
"http://localhost:5176",
|
||
"http://localhost:3000",
|
||
"http://127.0.0.1:5173",
|
||
"http://127.0.0.1:5174",
|
||
"http://127.0.0.1:5175",
|
||
"http://127.0.0.1:5176",
|
||
"http://127.0.0.1:3000",
|
||
# Docker Compose internal networking
|
||
"http://frontend",
|
||
"http://frontend:80",
|
||
# Docker host access (mapped ports)
|
||
"http://localhost:80",
|
||
"http://localhost:8080",
|
||
"http://127.0.0.1:80",
|
||
"http://127.0.0.1:8080",
|
||
# Production server (Coolify)
|
||
"http://194.187.253.230:3010",
|
||
"http://194.187.253.230",
|
||
# Coolify Traefik proxy (sslip.io)
|
||
"http://trendyol.194.187.253.230.sslip.io",
|
||
"http://trendyol-api.194.187.253.230.sslip.io",
|
||
]
|
||
|
||
# Add production domain from environment variable
|
||
frontend_url = os.getenv("FRONTEND_URL")
|
||
if frontend_url:
|
||
allowed_origins.append(frontend_url)
|
||
# Also add https variant if http is provided
|
||
if frontend_url.startswith("http://"):
|
||
allowed_origins.append(frontend_url.replace("http://", "https://"))
|
||
|
||
app.add_middleware(
|
||
CORSMiddleware,
|
||
allow_origins=allowed_origins,
|
||
allow_credentials=True,
|
||
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
|
||
allow_headers=["Content-Type", "Authorization"],
|
||
)
|
||
|
||
|
||
# Dependency to get DB session
|
||
def get_db():
|
||
db = SessionLocal()
|
||
try:
|
||
yield db
|
||
finally:
|
||
db.close()
|
||
|
||
|
||
# ============================================================================
|
||
# HEALTH CHECK ENDPOINT (Docker/Coolify)
|
||
# ============================================================================
|
||
|
||
@app.get("/health")
|
||
async def health_check():
|
||
"""
|
||
Health check endpoint for Docker HEALTHCHECK and Coolify monitoring.
|
||
Returns service status, database connectivity, and storage availability.
|
||
"""
|
||
try:
|
||
# Check database connection
|
||
db_path = os.getenv("DATABASE_PATH", os.path.join(BASE_DIR, "trendyol.db"))
|
||
db_exists = os.path.exists(db_path)
|
||
|
||
# Check data directories
|
||
categories_dir = CATEGORIES_DIR
|
||
reports_dir = os.getenv("REPORTS_DIR", REPORTS_DIR)
|
||
|
||
return {
|
||
"status": "healthy",
|
||
"timestamp": datetime.utcnow().isoformat(),
|
||
"database": {
|
||
"exists": db_exists,
|
||
"path": db_path
|
||
},
|
||
"storage": {
|
||
"categories": os.path.exists(categories_dir),
|
||
"reports": os.path.exists(reports_dir)
|
||
},
|
||
"version": "1.0.0"
|
||
}
|
||
except Exception as e:
|
||
raise HTTPException(
|
||
status_code=503,
|
||
detail=f"Health check failed: {str(e)}"
|
||
)
|
||
|
||
|
||
# ============================================================================
|
||
# TRENDYOL API HELPER FUNCTIONS
|
||
# ============================================================================
|
||
|
||
TRENDYOL_HEADERS = {
|
||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
||
"Accept": "application/json",
|
||
"Referer": "https://www.trendyol.com/",
|
||
"Connection": "keep-alive"
|
||
}
|
||
|
||
# Bounded cache with TTL to prevent memory leaks
|
||
class BoundedCache:
|
||
"""Thread-safe cache with size limit and TTL"""
|
||
def __init__(self, maxsize=100, ttl=3600):
|
||
self.cache = OrderedDict()
|
||
self.maxsize = maxsize
|
||
self.ttl = ttl
|
||
self.lock = Lock()
|
||
self.timestamps = {}
|
||
|
||
def get(self, key):
|
||
with self.lock:
|
||
if key not in self.cache:
|
||
return None
|
||
# Check TTL
|
||
if time.time() - self.timestamps.get(key, 0) > self.ttl:
|
||
del self.cache[key]
|
||
del self.timestamps[key]
|
||
return None
|
||
# Move to end (LRU)
|
||
self.cache.move_to_end(key)
|
||
return self.cache[key]
|
||
|
||
def set(self, key, value):
|
||
with self.lock:
|
||
if key in self.cache:
|
||
self.cache.move_to_end(key)
|
||
self.cache[key] = value
|
||
self.timestamps[key] = time.time()
|
||
# Evict oldest if over limit
|
||
if len(self.cache) > self.maxsize:
|
||
oldest = next(iter(self.cache))
|
||
del self.cache[oldest]
|
||
del self.timestamps[oldest]
|
||
|
||
def __contains__(self, key):
|
||
"""Support 'in' operator for cache key checking"""
|
||
with self.lock:
|
||
if key not in self.cache:
|
||
return False
|
||
# Check TTL
|
||
if time.time() - self.timestamps.get(key, 0) > self.ttl:
|
||
del self.cache[key]
|
||
del self.timestamps[key]
|
||
return False
|
||
return True
|
||
|
||
# Bounded caches for external API aggregations (prevent memory leaks)
|
||
reviews_cache = BoundedCache(maxsize=100, ttl=3600)
|
||
social_proof_cache = BoundedCache(maxsize=100, ttl=3600)
|
||
scraping_progress = BoundedCache(maxsize=50, ttl=7200)
|
||
dashboard_cache = BoundedCache(maxsize=50, ttl=3600)
|
||
enrichment_progress = BoundedCache(maxsize=50, ttl=7200)
|
||
|
||
# DISABLED: Questions, similar products, and followers features removed per user request
|
||
# questions_cache = {}
|
||
# similar_cache = {}
|
||
# followers_cache = {}
|
||
|
||
def _chunked(seq, size):
|
||
for i in range(0, len(seq), size):
|
||
yield seq[i:i+size]
|
||
|
||
def _safe_int(val):
|
||
try:
|
||
pass
|
||
# Convert abbreviated counts like "10B+" if present
|
||
s = str(val)
|
||
if s.endswith("B+"):
|
||
return int(s[:-2]) * 1_000_000_000
|
||
if s.endswith("M+"):
|
||
return int(s[:-2]) * 1_000_000
|
||
if s.endswith("K+"):
|
||
return int(s[:-2]) * 1_000
|
||
return int(float(s))
|
||
except Exception:
|
||
return 0
|
||
|
||
|
||
# ----------------------------------------------------------------------------
|
||
# Path validation helper (security: prevent path traversal)
|
||
# ----------------------------------------------------------------------------
|
||
import os
|
||
from pathlib import Path
|
||
|
||
# Define safe base directories
|
||
BASE_DIR = Path(__file__).parent.parent
|
||
SAFE_DIRS = {
|
||
"categories": BASE_DIR / "categories",
|
||
"reports": BASE_DIR / "reports",
|
||
"incoming": BASE_DIR / "incoming"
|
||
}
|
||
|
||
def validate_safe_path(filepath: str, directory_type: str) -> Path:
|
||
"""
|
||
Validate that a file path is within allowed directory.
|
||
Prevents path traversal attacks.
|
||
|
||
Args:
|
||
filepath: Relative or absolute file path
|
||
directory_type: One of 'categories', 'reports', 'incoming'
|
||
|
||
Returns:
|
||
Resolved Path object
|
||
|
||
Raises:
|
||
ValueError: If path is outside allowed directory or invalid type
|
||
"""
|
||
base_dir = SAFE_DIRS.get(directory_type)
|
||
if not base_dir:
|
||
raise ValueError(f"Invalid directory type: {directory_type}")
|
||
|
||
# Convert to Path and resolve
|
||
if os.path.isabs(filepath):
|
||
resolved_path = Path(filepath).resolve()
|
||
else:
|
||
resolved_path = (base_dir / filepath).resolve()
|
||
|
||
# Ensure path is within allowed directory
|
||
try:
|
||
resolved_path.relative_to(base_dir)
|
||
except ValueError:
|
||
raise ValueError(f"Path traversal detected: {filepath}")
|
||
|
||
return resolved_path
|
||
|
||
|
||
# ----------------------------------------------------------------------------
|
||
# Robust HTTP helper: shared session, retries, rate limiting
|
||
# ----------------------------------------------------------------------------
|
||
from requests.adapters import HTTPAdapter
|
||
from urllib3.util.retry import Retry
|
||
import threading
|
||
|
||
_retry_strategy = Retry(
|
||
total=5, # More retries to handle rate limiting
|
||
connect=3,
|
||
read=3,
|
||
backoff_factor=2.0, # Longer backoff: 2s, 4s, 8s, 16s, 32s
|
||
status_forcelist=[429, 500, 502, 503, 504],
|
||
allowed_methods=["HEAD", "GET", "OPTIONS"]
|
||
)
|
||
|
||
_http_adapter = HTTPAdapter(max_retries=_retry_strategy, pool_connections=100, pool_maxsize=200)
|
||
_session = requests.Session()
|
||
_session.headers.update(TRENDYOL_HEADERS)
|
||
_session.mount("https://", _http_adapter)
|
||
_DEFAULT_TIMEOUT = 30 # Longer timeout to avoid premature failures
|
||
|
||
|
||
class _RateLimiter:
|
||
def __init__(self, rate_per_sec: float):
|
||
self.min_interval = 1.0 / max(rate_per_sec, 0.1)
|
||
self._lock = threading.Lock()
|
||
self._next_time = 0.0
|
||
|
||
def wait(self):
|
||
with self._lock:
|
||
now = time.monotonic()
|
||
if self._next_time <= now:
|
||
self._next_time = now + self.min_interval
|
||
return
|
||
sleep_for = self._next_time - now
|
||
self._next_time += self.min_interval
|
||
# small jitter to avoid bursts
|
||
time.sleep(max(0, sleep_for) + random.uniform(0.0, 0.05))
|
||
|
||
|
||
_trendyol_limiter = _RateLimiter(rate_per_sec=5.0) # 0.2 seconds between requests (Optimized for localhost - 10x faster!)
|
||
|
||
|
||
# Circuit Breaker for Social Proof endpoint
|
||
class _CircuitBreaker:
|
||
def __init__(self, failure_threshold: int = 3, reset_timeout: float = 300.0):
|
||
self.failure_threshold = failure_threshold
|
||
self.reset_timeout = reset_timeout
|
||
self._failures = 0
|
||
self._opened_at = None
|
||
self._lock = threading.Lock()
|
||
|
||
def is_open(self) -> bool:
|
||
"""Check if circuit is open (blocking requests)"""
|
||
with self._lock:
|
||
if self._opened_at is None:
|
||
return False
|
||
# Auto-reset after timeout
|
||
if time.monotonic() - self._opened_at > self.reset_timeout:
|
||
self._failures = 0
|
||
self._opened_at = None
|
||
return False
|
||
return True
|
||
|
||
def record_failure(self):
|
||
"""Record a failure and potentially open the circuit"""
|
||
with self._lock:
|
||
self._failures += 1
|
||
if self._failures >= self.failure_threshold and self._opened_at is None:
|
||
self._opened_at = time.monotonic()
|
||
|
||
def record_success(self):
|
||
"""Record a success and reset the circuit"""
|
||
with self._lock:
|
||
self._failures = 0
|
||
self._opened_at = None
|
||
|
||
def get_status(self) -> dict:
|
||
"""Get current circuit status"""
|
||
with self._lock:
|
||
if self._opened_at is None:
|
||
return {"status": "closed", "failures": self._failures}
|
||
time_since_open = time.monotonic() - self._opened_at
|
||
time_until_reset = max(0, self.reset_timeout - time_since_open)
|
||
return {
|
||
"status": "open",
|
||
"failures": self._failures,
|
||
"time_until_reset": round(time_until_reset, 1)
|
||
}
|
||
|
||
|
||
_social_proof_breaker = _CircuitBreaker(failure_threshold=3, reset_timeout=300.0)
|
||
|
||
|
||
def _http_get(url: str, params: dict) -> requests.Response:
|
||
"""GET with shared session, retry, timeout, and rate limiting."""
|
||
_trendyol_limiter.wait()
|
||
return _session.get(url, params=params, timeout=_DEFAULT_TIMEOUT)
|
||
|
||
|
||
from typing import Optional as _Optional
|
||
|
||
def _log_enrichment_error(db: Session, *, report_id: _Optional[int], product_id: _Optional[int], merchant_id: _Optional[int],
|
||
endpoint: str, error_type: str, message: str, status_code: _Optional[int] = None, attempt: int = 1):
|
||
try:
|
||
db.add(EnrichmentError(
|
||
report_id=report_id,
|
||
product_id=product_id,
|
||
merchant_id=merchant_id,
|
||
endpoint=endpoint,
|
||
error_type=error_type,
|
||
message=message[:400] if message else None,
|
||
status_code=status_code,
|
||
attempt=attempt
|
||
))
|
||
db.commit()
|
||
except Exception:
|
||
pass
|
||
# Avoid crashing on logging failures
|
||
db.rollback()
|
||
|
||
def load_report_products(db: Session, report_id: int):
|
||
"""Load all product dicts for a report from stored category JSON files."""
|
||
import json, os
|
||
from collections import defaultdict
|
||
report = db.query(Report).filter(Report.id == report_id).first()
|
||
if not report:
|
||
raise HTTPException(status_code=404, detail="Report not found")
|
||
if not report.json_file_path or not os.path.exists(report.json_file_path):
|
||
raise HTTPException(status_code=404, detail="Report data file not found")
|
||
try:
|
||
with open(report.json_file_path, 'r', encoding='utf-8') as f:
|
||
report_data = json.load(f)
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=f"Error reading report file: {str(e)}")
|
||
|
||
all_products = []
|
||
categories_data = defaultdict(list)
|
||
for detail in report_data.get("details", []):
|
||
if detail.get("success") and detail.get("file_path"):
|
||
file_path = detail["file_path"]
|
||
if os.path.exists(file_path):
|
||
try:
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
cat_data = json.load(f)
|
||
products = cat_data.get("products", [])
|
||
file_category_name = cat_data.get("category_name", detail.get("category_name", ""))
|
||
|
||
# Add file-level category_name to each product if product's category is empty
|
||
for product in products:
|
||
# Check if product has category field
|
||
product_category = product.get("category", {})
|
||
if isinstance(product_category, dict):
|
||
# If category.name is empty or missing, use file-level category_name
|
||
if not product_category.get("name"):
|
||
product["category"] = {
|
||
"id": product_category.get("id", 0),
|
||
"name": file_category_name
|
||
}
|
||
elif not product_category:
|
||
# If category field doesn't exist, create it
|
||
product["category"] = {
|
||
"id": 0,
|
||
"name": file_category_name
|
||
}
|
||
|
||
all_products.extend(products)
|
||
categories_data[detail["category_name"]].extend(products)
|
||
except Exception:
|
||
continue
|
||
return all_products, categories_data
|
||
|
||
|
||
# Persistent cache helpers
|
||
def _ensure_dir(path: str):
|
||
import os
|
||
os.makedirs(path, exist_ok=True)
|
||
|
||
def _save_json(path: str, data):
|
||
import json
|
||
_ensure_dir(os.path.dirname(path))
|
||
with open(path, 'w', encoding='utf-8') as f:
|
||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||
|
||
def _load_json(path: str):
|
||
import json, os
|
||
if os.path.exists(path):
|
||
with open(path, 'r', encoding='utf-8') as f:
|
||
return json.load(f)
|
||
return None
|
||
|
||
def fetch_product_reviews(product_id: int, page: int = 0, page_size: int = 5):
|
||
"""Ürün yorumlarını çeker"""
|
||
url = "https://apigw.trendyol.com/discovery-storefront-trproductgw-service/api/review-read/product-reviews/detailed"
|
||
|
||
params = {
|
||
"contentId": product_id,
|
||
"page": page,
|
||
"pageSize": page_size,
|
||
"culture": "tr-TR",
|
||
"showReviewWithSizeFeedback": True,
|
||
"channelId": 1
|
||
}
|
||
|
||
try:
|
||
resp = _http_get(url, params)
|
||
if resp.status_code == 200:
|
||
return resp.json()
|
||
except Exception as e:
|
||
print(f"Review API error for product {product_id}: {e}")
|
||
return None
|
||
|
||
|
||
def fetch_social_proof(product_ids: list):
|
||
"""Sosyal kanıt verilerini çeker (görüntülenme, sipariş sayıları)"""
|
||
url = "https://apigw.trendyol.com/discovery-storefront-trproductgw-service/api/social-proof/"
|
||
|
||
params = {
|
||
"contentIds": ",".join(map(str, product_ids)),
|
||
"culture": "tr-TR",
|
||
"channelId": 1
|
||
}
|
||
|
||
try:
|
||
resp = _http_get(url, params)
|
||
if resp.status_code == 200:
|
||
data = resp.json()
|
||
# API doğrudan product ID'leri key olarak döndürüyor
|
||
# Convert to list format for easier processing
|
||
result = []
|
||
for product_id_str, product_data in data.items():
|
||
product_id = int(product_id_str)
|
||
social_proofs = product_data.get("socialProofs", [])
|
||
|
||
# Parse social proof metrics
|
||
metrics = {"contentId": product_id}
|
||
for proof in social_proofs:
|
||
proof_id = proof.get("id", "")
|
||
count_str = proof.get("count", "0")
|
||
|
||
# Convert count string to integer (e.g., "10B+" -> 10000)
|
||
count = _parse_social_count(count_str)
|
||
|
||
if proof_id == "page-view-count":
|
||
metrics["pageViewCount"] = count
|
||
elif proof_id == "order-count":
|
||
metrics["orderCount"] = count
|
||
elif proof_id == "basket-count":
|
||
metrics["basketCount"] = count
|
||
elif proof_id == "favorite-count":
|
||
metrics["favoriteCount"] = count
|
||
|
||
result.append(metrics)
|
||
|
||
return {"result": result} if result else data
|
||
except Exception as e:
|
||
print(f"Social Proof API error: {e}")
|
||
return None
|
||
|
||
|
||
def _parse_social_count(count_str: str) -> int:
|
||
"""Parse social proof count string like '10B+', '75,2B' to integer"""
|
||
if not count_str:
|
||
return 0
|
||
|
||
try:
|
||
pass
|
||
# Remove '+' and replace ',' with '.'
|
||
clean = count_str.replace('+', '').replace(',', '.')
|
||
|
||
# Check if it has 'B' (bin = thousand in Turkish)
|
||
if 'B' in clean or 'b' in clean:
|
||
pass
|
||
# '10B' -> 10000, '75.2B' -> 75200
|
||
num = float(clean.replace('B', '').replace('b', ''))
|
||
return int(num * 1000)
|
||
|
||
# Try to parse as float
|
||
return int(float(clean))
|
||
except:
|
||
return 0
|
||
|
||
|
||
def fetch_merchant_questions(product_id: int, page: int = 0, page_size: int = 4):
|
||
"""Satıcı sorularını çeker"""
|
||
url = f"https://apigw.trendyol.com/discovery-pdp-websfxmerchantquestions-santral/{product_id}/questions/answered/filter"
|
||
|
||
params = {
|
||
"excludeTag": False,
|
||
"page": page,
|
||
"size": page_size,
|
||
"culture": "tr-TR",
|
||
"channelId": 1
|
||
}
|
||
|
||
try:
|
||
resp = _http_get(url, params)
|
||
if resp.status_code == 200:
|
||
return resp.json()
|
||
except Exception as e:
|
||
print(f"Merchant Questions API error for product {product_id}: {e}")
|
||
return None
|
||
|
||
|
||
def fetch_similar_products(product_id: int, page: int = 0, page_size: int = 8):
|
||
"""Benzer ürünleri çeker"""
|
||
url = f"https://apigw.trendyol.com/discovery-storefront-trproductgw-service/api/reco-products/{product_id}"
|
||
|
||
params = {
|
||
"page": page,
|
||
"size": page_size,
|
||
"stampType": "TypeA",
|
||
"isDynamicRendering": False,
|
||
"culture": "tr-TR",
|
||
"recoType": "similar",
|
||
"channelId": 1
|
||
}
|
||
|
||
try:
|
||
resp = _http_get(url, params)
|
||
if resp.status_code == 200:
|
||
return resp.json()
|
||
except Exception as e:
|
||
print(f"Similar Products API error for product {product_id}: {e}")
|
||
return None
|
||
|
||
|
||
def fetch_merchant_followers(merchant_id: int):
|
||
"""Satıcı takipçi sayısını çeker"""
|
||
url = f"https://apigw.trendyol.com/discovery-pdp-websfxsellerstorefollow-santral/{merchant_id}/follower-count"
|
||
|
||
params = {
|
||
"culture": "tr-TR",
|
||
"channelId": 1
|
||
}
|
||
|
||
try:
|
||
resp = _http_get(url, params)
|
||
if resp.status_code == 200:
|
||
return resp.json()
|
||
except Exception as e:
|
||
print(f"Merchant Followers API error for merchant {merchant_id}: {e}")
|
||
return None
|
||
|
||
|
||
# Pydantic models for API
|
||
class CategoryBase(BaseModel):
|
||
name: str
|
||
parent_id: Optional[int] = None
|
||
trendyol_category_id: Optional[int] = None
|
||
trendyol_url: Optional[str] = None
|
||
is_active: bool = True
|
||
|
||
|
||
class CategoryCreate(CategoryBase):
|
||
pass
|
||
|
||
|
||
class CategoryUpdate(BaseModel):
|
||
name: Optional[str] = None
|
||
parent_id: Optional[int] = None
|
||
trendyol_category_id: Optional[int] = None
|
||
trendyol_url: Optional[str] = None
|
||
is_active: Optional[bool] = None
|
||
|
||
|
||
class CategoryResponse(CategoryBase):
|
||
id: int
|
||
created_at: datetime
|
||
children_count: int = 0
|
||
|
||
class Config:
|
||
from_attributes = True
|
||
|
||
|
||
class SnapshotResponse(BaseModel):
|
||
id: int
|
||
category_id: int
|
||
snapshot_month: str
|
||
total_products: int
|
||
avg_price: int
|
||
json_file_path: Optional[str]
|
||
scraped_at: datetime
|
||
|
||
class Config:
|
||
from_attributes = True
|
||
|
||
|
||
# Root endpoint
|
||
@app.get("/")
|
||
def root():
|
||
return {
|
||
"message": "Trendyol Admin API",
|
||
"version": "1.0.0",
|
||
"endpoints": {
|
||
"categories": "/categories",
|
||
"category_detail": "/categories/{id}",
|
||
"category_children": "/categories/{id}/children",
|
||
"main_categories": "/categories/main",
|
||
"snapshots": "/snapshots"
|
||
}
|
||
}
|
||
|
||
|
||
# Get all categories
|
||
@app.get("/categories", response_model=List[CategoryResponse])
|
||
def get_categories(db: Session = Depends(get_db), skip: int = 0, limit: int = 200):
|
||
"""Get all categories with pagination (OPTIMIZED: single query for children counts)"""
|
||
categories = db.query(Category).offset(skip).limit(limit).all()
|
||
|
||
# OPTIMIZATION: Get all children counts in a single query using GROUP BY
|
||
children_counts_query = db.query(
|
||
Category.parent_id,
|
||
func.count(Category.id).label('count')
|
||
).filter(Category.parent_id.isnot(None)).group_by(Category.parent_id).all()
|
||
|
||
# Convert to dict for fast lookup
|
||
children_counts = {parent_id: count for parent_id, count in children_counts_query}
|
||
|
||
result = []
|
||
for cat in categories:
|
||
# Fast O(1) lookup instead of N database queries
|
||
children_count = children_counts.get(cat.id, 0)
|
||
|
||
cat_dict = {
|
||
"id": cat.id,
|
||
"name": cat.name,
|
||
"parent_id": cat.parent_id,
|
||
"trendyol_category_id": cat.trendyol_category_id,
|
||
"trendyol_url": cat.trendyol_url,
|
||
"is_active": cat.is_active,
|
||
"created_at": cat.created_at,
|
||
"children_count": children_count
|
||
}
|
||
result.append(CategoryResponse(**cat_dict))
|
||
|
||
return result
|
||
|
||
|
||
# Get main categories (no parent)
|
||
@app.get("/categories/main", response_model=List[CategoryResponse])
|
||
def get_main_categories(db: Session = Depends(get_db)):
|
||
"""Get only main categories (parent_id is NULL) - OPTIMIZED"""
|
||
categories = db.query(Category).filter(Category.parent_id == None).all()
|
||
|
||
# OPTIMIZATION: Get all children counts in a single query
|
||
children_counts_query = db.query(
|
||
Category.parent_id,
|
||
func.count(Category.id).label('count')
|
||
).filter(Category.parent_id.isnot(None)).group_by(Category.parent_id).all()
|
||
|
||
children_counts = {parent_id: count for parent_id, count in children_counts_query}
|
||
|
||
result = []
|
||
for cat in categories:
|
||
# Fast O(1) lookup
|
||
children_count = children_counts.get(cat.id, 0)
|
||
|
||
cat_dict = {
|
||
"id": cat.id,
|
||
"name": cat.name,
|
||
"parent_id": cat.parent_id,
|
||
"trendyol_category_id": cat.trendyol_category_id,
|
||
"trendyol_url": cat.trendyol_url,
|
||
"is_active": cat.is_active,
|
||
"created_at": cat.created_at,
|
||
"children_count": children_count
|
||
}
|
||
result.append(CategoryResponse(**cat_dict))
|
||
|
||
return result
|
||
|
||
|
||
# Get category by ID
|
||
@app.get("/categories/{category_id}", response_model=CategoryResponse)
|
||
def get_category(category_id: int, db: Session = Depends(get_db)):
|
||
"""Get a specific category by ID"""
|
||
category = db.query(Category).filter(Category.id == category_id).first()
|
||
if not category:
|
||
raise HTTPException(status_code=404, detail="Category not found")
|
||
|
||
# Count children manually
|
||
children_count = db.query(Category).filter(Category.parent_id == category.id).count()
|
||
|
||
cat_dict = {
|
||
"id": category.id,
|
||
"name": category.name,
|
||
"parent_id": category.parent_id,
|
||
"trendyol_category_id": category.trendyol_category_id,
|
||
"trendyol_url": category.trendyol_url,
|
||
"is_active": category.is_active,
|
||
"created_at": category.created_at,
|
||
"children_count": children_count
|
||
}
|
||
|
||
return CategoryResponse(**cat_dict)
|
||
|
||
|
||
# Get category children (subcategories)
|
||
@app.get("/categories/{category_id}/children", response_model=List[CategoryResponse])
|
||
def get_category_children(category_id: int, db: Session = Depends(get_db)):
|
||
"""Get all subcategories of a category - OPTIMIZED"""
|
||
# Verify parent exists
|
||
parent = db.query(Category).filter(Category.id == category_id).first()
|
||
if not parent:
|
||
raise HTTPException(status_code=404, detail="Parent category not found")
|
||
|
||
# Get children
|
||
children = db.query(Category).filter(Category.parent_id == category_id).all()
|
||
|
||
# OPTIMIZATION: Get all children counts in a single query
|
||
children_counts_query = db.query(
|
||
Category.parent_id,
|
||
func.count(Category.id).label('count')
|
||
).filter(Category.parent_id.isnot(None)).group_by(Category.parent_id).all()
|
||
|
||
children_counts = {parent_id: count for parent_id, count in children_counts_query}
|
||
|
||
result = []
|
||
for cat in children:
|
||
# Fast O(1) lookup
|
||
children_count = children_counts.get(cat.id, 0)
|
||
|
||
cat_dict = {
|
||
"id": cat.id,
|
||
"name": cat.name,
|
||
"parent_id": cat.parent_id,
|
||
"trendyol_category_id": cat.trendyol_category_id,
|
||
"trendyol_url": cat.trendyol_url,
|
||
"is_active": cat.is_active,
|
||
"created_at": cat.created_at,
|
||
"children_count": children_count
|
||
}
|
||
result.append(CategoryResponse(**cat_dict))
|
||
|
||
return result
|
||
|
||
|
||
# Create new category
|
||
@app.post("/categories", response_model=CategoryResponse, status_code=201)
|
||
def create_category(category: CategoryCreate, db: Session = Depends(get_db)):
|
||
"""Create a new category"""
|
||
# Verify parent exists if parent_id provided
|
||
if category.parent_id:
|
||
parent = db.query(Category).filter(Category.id == category.parent_id).first()
|
||
if not parent:
|
||
raise HTTPException(status_code=404, detail="Parent category not found")
|
||
|
||
# Create new category
|
||
db_category = Category(
|
||
name=category.name,
|
||
parent_id=category.parent_id,
|
||
trendyol_category_id=category.trendyol_category_id,
|
||
trendyol_url=category.trendyol_url,
|
||
is_active=category.is_active
|
||
)
|
||
|
||
db.add(db_category)
|
||
db.commit()
|
||
db.refresh(db_category)
|
||
|
||
cat_dict = {
|
||
"id": db_category.id,
|
||
"name": db_category.name,
|
||
"parent_id": db_category.parent_id,
|
||
"trendyol_category_id": db_category.trendyol_category_id,
|
||
"trendyol_url": db_category.trendyol_url,
|
||
"is_active": db_category.is_active,
|
||
"created_at": db_category.created_at,
|
||
"children_count": 0
|
||
}
|
||
|
||
return CategoryResponse(**cat_dict)
|
||
|
||
|
||
# Update category
|
||
@app.put("/categories/{category_id}", response_model=CategoryResponse)
|
||
def update_category(category_id: int, category: CategoryUpdate, db: Session = Depends(get_db)):
|
||
"""Update an existing category"""
|
||
db_category = db.query(Category).filter(Category.id == category_id).first()
|
||
if not db_category:
|
||
raise HTTPException(status_code=404, detail="Category not found")
|
||
|
||
# Update only provided fields
|
||
if category.name is not None:
|
||
db_category.name = category.name
|
||
if category.parent_id is not None:
|
||
if category.parent_id > 0:
|
||
parent = db.query(Category).filter(Category.id == category.parent_id).first()
|
||
if not parent:
|
||
raise HTTPException(status_code=404, detail="Parent category not found")
|
||
db_category.parent_id = category.parent_id if category.parent_id > 0 else None
|
||
if category.trendyol_category_id is not None:
|
||
db_category.trendyol_category_id = category.trendyol_category_id
|
||
if category.trendyol_url is not None:
|
||
db_category.trendyol_url = category.trendyol_url
|
||
if category.is_active is not None:
|
||
db_category.is_active = category.is_active
|
||
|
||
db.commit()
|
||
db.refresh(db_category)
|
||
|
||
# Count children manually
|
||
children_count = db.query(Category).filter(Category.parent_id == db_category.id).count()
|
||
|
||
cat_dict = {
|
||
"id": db_category.id,
|
||
"name": db_category.name,
|
||
"parent_id": db_category.parent_id,
|
||
"trendyol_category_id": db_category.trendyol_category_id,
|
||
"trendyol_url": db_category.trendyol_url,
|
||
"is_active": db_category.is_active,
|
||
"created_at": db_category.created_at,
|
||
"children_count": children_count
|
||
}
|
||
|
||
return CategoryResponse(**cat_dict)
|
||
|
||
|
||
# Delete category
|
||
@app.delete("/categories/{category_id}")
|
||
def delete_category(category_id: int, db: Session = Depends(get_db)):
|
||
"""Delete a category"""
|
||
db_category = db.query(Category).filter(Category.id == category_id).first()
|
||
if not db_category:
|
||
raise HTTPException(status_code=404, detail="Category not found")
|
||
|
||
# Check if category has children
|
||
children_count = db.query(Category).filter(Category.parent_id == db_category.id).count()
|
||
if children_count > 0:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail="Cannot delete category with subcategories. Delete children first."
|
||
)
|
||
|
||
db.delete(db_category)
|
||
db.commit()
|
||
|
||
return {"message": "Category deleted successfully", "id": category_id}
|
||
|
||
|
||
# Bulk import categories with hierarchy
|
||
class BulkCategoryItem(BaseModel):
|
||
name: str
|
||
parent_name: Optional[str] = None
|
||
trendyol_category_id: Optional[int] = None
|
||
trendyol_url: Optional[str] = None
|
||
|
||
class BulkCategoryImport(BaseModel):
|
||
categories: List[BulkCategoryItem]
|
||
clear_existing: bool = False
|
||
|
||
@app.post("/categories/bulk-import")
|
||
def bulk_import_categories(data: BulkCategoryImport, db: Session = Depends(get_db)):
|
||
"""Bulk import categories with hierarchy support.
|
||
Categories are processed in order: parent categories should come before children.
|
||
Uses parent_name to establish parent-child relationships."""
|
||
|
||
if data.clear_existing:
|
||
db.query(Category).delete()
|
||
db.commit()
|
||
|
||
name_to_id = {}
|
||
created = 0
|
||
errors = []
|
||
|
||
for item in data.categories:
|
||
parent_id = None
|
||
if item.parent_name:
|
||
parent_id = name_to_id.get(item.parent_name)
|
||
if parent_id is None:
|
||
# Try to find by name in DB
|
||
parent = db.query(Category).filter(Category.name == item.parent_name).first()
|
||
if parent:
|
||
parent_id = parent.id
|
||
name_to_id[item.parent_name] = parent.id
|
||
else:
|
||
errors.append(f"Parent '{item.parent_name}' not found for '{item.name}'")
|
||
continue
|
||
|
||
db_cat = Category(
|
||
name=item.name,
|
||
parent_id=parent_id,
|
||
trendyol_category_id=item.trendyol_category_id,
|
||
trendyol_url=item.trendyol_url,
|
||
is_active=True
|
||
)
|
||
db.add(db_cat)
|
||
db.flush()
|
||
name_to_id[item.name] = db_cat.id
|
||
created += 1
|
||
|
||
db.commit()
|
||
return {
|
||
"message": f"Imported {created} categories",
|
||
"created": created,
|
||
"errors": errors[:20],
|
||
"error_count": len(errors)
|
||
}
|
||
|
||
|
||
# Get all snapshots
|
||
@app.get("/snapshots", response_model=List[SnapshotResponse])
|
||
def get_snapshots(db: Session = Depends(get_db), skip: int = 0, limit: int = 100):
|
||
"""Get all snapshots with pagination"""
|
||
snapshots = db.query(Snapshot).offset(skip).limit(limit).all()
|
||
return snapshots
|
||
|
||
|
||
# Get snapshots for a category
|
||
@app.get("/categories/{category_id}/snapshots", response_model=List[SnapshotResponse])
|
||
def get_category_snapshots(category_id: int, db: Session = Depends(get_db)):
|
||
"""Get all snapshots for a specific category"""
|
||
# Verify category exists
|
||
category = db.query(Category).filter(Category.id == category_id).first()
|
||
if not category:
|
||
raise HTTPException(status_code=404, detail="Category not found")
|
||
|
||
snapshots = db.query(Snapshot).filter(Snapshot.category_id == category_id).all()
|
||
return snapshots
|
||
|
||
|
||
# Get products for a category from JSON file
|
||
@app.get("/categories/{category_id}/products")
|
||
def get_category_products(category_id: int, db: Session = Depends(get_db)):
|
||
"""Get products from category JSON file"""
|
||
import json
|
||
import os
|
||
|
||
# Get category from DB
|
||
category = db.query(Category).filter(Category.id == category_id).first()
|
||
if not category:
|
||
raise HTTPException(status_code=404, detail="Category not found")
|
||
|
||
# Build JSON file path
|
||
trendyol_id = category.trendyol_category_id
|
||
if not trendyol_id:
|
||
raise HTTPException(status_code=400, detail="Category has no Trendyol ID")
|
||
|
||
# Search for JSON file in categories folder
|
||
categories_dir = CATEGORIES_DIR
|
||
json_files = [f for f in os.listdir(categories_dir) if f.endswith(f"_{trendyol_id}.json")]
|
||
|
||
if not json_files:
|
||
raise HTTPException(status_code=404, detail=f"No JSON file found for category {trendyol_id}")
|
||
|
||
json_path = os.path.join(categories_dir, json_files[0])
|
||
|
||
# Read JSON file
|
||
try:
|
||
with open(json_path, 'r', encoding='utf-8') as f:
|
||
data = json.load(f)
|
||
|
||
return {
|
||
"category_id": category_id,
|
||
"category_name": category.name,
|
||
"trendyol_category_id": trendyol_id,
|
||
"total_products": len(data.get("products", [])),
|
||
"products": data.get("products", [])
|
||
}
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=f"Error reading JSON file: {str(e)}")
|
||
|
||
|
||
# Scraping endpoint
|
||
@app.post("/api/scrape/category/{category_id}")
|
||
def scrape_category_data(category_id: int, db: Session = Depends(get_db)):
|
||
"""
|
||
Scrape all subcategories of a main category
|
||
"""
|
||
from scraper import scrape_multiple_categories
|
||
|
||
# Get main category
|
||
main_category = db.query(Category).filter(Category.id == category_id).first()
|
||
if not main_category:
|
||
raise HTTPException(status_code=404, detail="Category not found")
|
||
|
||
# Get all subcategories
|
||
sub_categories = db.query(Category).filter(Category.parent_id == category_id).all()
|
||
|
||
if not sub_categories:
|
||
raise HTTPException(status_code=404, detail="No subcategories found")
|
||
|
||
# Prepare category list for scraper
|
||
categories_to_scrape = [
|
||
(sub_cat.trendyol_category_id, sub_cat.name)
|
||
for sub_cat in sub_categories
|
||
if sub_cat.trendyol_category_id # Only scrape if has Trendyol ID
|
||
]
|
||
|
||
if not categories_to_scrape:
|
||
raise HTTPException(status_code=400, detail="No valid Trendyol IDs found")
|
||
|
||
# Start scraping
|
||
results = scrape_multiple_categories(categories_to_scrape, delay=2.0)
|
||
|
||
# Create snapshots for successful scrapes
|
||
for detail in results["details"]:
|
||
if detail["success"]:
|
||
pass
|
||
# Find the category in DB
|
||
sub_cat = db.query(Category).filter(
|
||
Category.trendyol_category_id == detail["category_id"]
|
||
).first()
|
||
|
||
if sub_cat:
|
||
pass
|
||
# Create snapshot
|
||
snapshot = Snapshot(
|
||
category_id=sub_cat.id,
|
||
snapshot_month=datetime.now().strftime("%Y-%m"),
|
||
total_products=detail["total_products"],
|
||
avg_price=0, # Calculate from products if needed
|
||
json_file_path=detail["file_path"],
|
||
scraped_at=datetime.now()
|
||
)
|
||
db.add(snapshot)
|
||
|
||
db.commit()
|
||
|
||
return {
|
||
"main_category": main_category.name,
|
||
"total_subcategories": len(categories_to_scrape),
|
||
"successful": results["successful"],
|
||
"failed": results["failed"],
|
||
"total_products": results["total_products"],
|
||
"details": results["details"]
|
||
}
|
||
|
||
|
||
# Report Models
|
||
class ReportCreate(BaseModel):
|
||
name: str
|
||
category_id: int
|
||
subcategory_ids: Optional[List[int]] = None # Optional: If provided, create report only for these subcategories
|
||
|
||
|
||
class ReportUpdate(BaseModel):
|
||
name: str # Only name can be updated
|
||
|
||
|
||
class ReportResponse(BaseModel):
|
||
id: int
|
||
name: str
|
||
category_id: int
|
||
category_name: str
|
||
total_products: int
|
||
total_subcategories: int
|
||
json_file_path: Optional[str]
|
||
html_file_path: Optional[str]
|
||
created_at: datetime
|
||
|
||
class Config:
|
||
from_attributes = True
|
||
|
||
|
||
# Get all reports
|
||
@app.get("/api/reports", response_model=List[ReportResponse])
|
||
def get_reports(db: Session = Depends(get_db)):
|
||
"""Get all saved reports"""
|
||
from sqlalchemy.orm import joinedload
|
||
reports = db.query(Report).options(joinedload(Report.category)).order_by(Report.created_at.desc()).all()
|
||
|
||
result = []
|
||
for report in reports:
|
||
result.append({
|
||
"id": report.id,
|
||
"name": report.name,
|
||
"category_id": report.category_id,
|
||
"category_name": report.category.name if report.category else "Unknown",
|
||
"total_products": report.total_products,
|
||
"total_subcategories": report.total_subcategories,
|
||
"json_file_path": report.json_file_path,
|
||
"html_file_path": report.html_file_path,
|
||
"created_at": report.created_at
|
||
})
|
||
|
||
return result
|
||
|
||
|
||
# Get single report
|
||
@app.get("/api/reports/create")
|
||
async def create_report(
|
||
name: str,
|
||
category_id: int,
|
||
subcategory_ids: Optional[str] = None,
|
||
db: Session = Depends(get_db)
|
||
):
|
||
"""
|
||
Create a new report with real-time progress streaming (SSE)
|
||
SYNCHRONOUS: Report only saved when 100% complete
|
||
Accepts GET request for EventSource compatibility
|
||
"""
|
||
# print(f"🔍 DEBUG - Received request:")
|
||
print(f" - name: {name}")
|
||
print(f" - category_id: {category_id}")
|
||
print(f" - subcategory_ids (raw): {subcategory_ids}")
|
||
|
||
# Parse subcategory_ids if provided
|
||
parsed_subcategory_ids = None
|
||
if subcategory_ids:
|
||
try:
|
||
parsed_subcategory_ids = json_module.loads(subcategory_ids)
|
||
print(f" - subcategory_ids (parsed): {parsed_subcategory_ids}")
|
||
except Exception as e:
|
||
print(f" - ❌ Error parsing subcategory_ids: {e}")
|
||
parsed_subcategory_ids = None
|
||
|
||
# Get main category
|
||
main_category = db.query(Category).filter(Category.id == category_id).first()
|
||
if not main_category:
|
||
raise HTTPException(status_code=404, detail="Category not found")
|
||
|
||
# Check if specific subcategories were requested
|
||
if parsed_subcategory_ids and len(parsed_subcategory_ids) > 0:
|
||
pass
|
||
# Get only the specified subcategories
|
||
sub_categories = db.query(Category).filter(Category.id.in_(parsed_subcategory_ids)).all()
|
||
|
||
if len(sub_categories) != len(parsed_subcategory_ids):
|
||
raise HTTPException(status_code=404, detail="One or more subcategories not found")
|
||
|
||
# Verify that all subcategories belong to the main category
|
||
for sub_cat in sub_categories:
|
||
if sub_cat.parent_id != category_id:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Subcategory '{sub_cat.name}' does not belong to the selected main category"
|
||
)
|
||
else:
|
||
pass
|
||
# Get all subcategories
|
||
sub_categories = db.query(Category).filter(Category.parent_id == category_id).all()
|
||
if not sub_categories:
|
||
raise HTTPException(status_code=404, detail="No subcategories found")
|
||
|
||
# Prepare category list for scraper
|
||
categories_to_scrape = [
|
||
(sub_cat.trendyol_category_id, sub_cat.name)
|
||
for sub_cat in sub_categories
|
||
if sub_cat.trendyol_category_id
|
||
]
|
||
|
||
if not categories_to_scrape:
|
||
raise HTTPException(status_code=400, detail="No valid Trendyol IDs found")
|
||
|
||
# Generate unique task ID
|
||
task_id = str(uuid.uuid4())
|
||
|
||
# Stream progress with SSE
|
||
async def progress_stream():
|
||
"""Generator that yields real-time progress events"""
|
||
try:
|
||
pass
|
||
# Send initial info
|
||
yield f"data: {json_module.dumps({'type': 'info', 'message': f'📂 {main_category.name} kategorisi seçildi', 'progress': 0})}\n\n"
|
||
await asyncio.sleep(0.1)
|
||
|
||
yield f"data: {json_module.dumps({'type': 'info', 'message': f'📊 {len(categories_to_scrape)} alt kategori bulundu', 'progress': 0})}\n\n"
|
||
await asyncio.sleep(0.1)
|
||
|
||
yield f"data: {json_module.dumps({'type': 'info', 'message': '🚀 Veri çekimi başlatılıyor...', 'progress': 5})}\n\n"
|
||
await asyncio.sleep(0.5)
|
||
|
||
# Start synchronous scraping with progress updates
|
||
from scraper import TrendyolScraper
|
||
import json
|
||
import os
|
||
from datetime import datetime
|
||
import time
|
||
|
||
results = {
|
||
"successful": 0,
|
||
"failed": 0,
|
||
"total_products": 0,
|
||
"details": []
|
||
}
|
||
|
||
# Scrape each category with real-time updates
|
||
for idx, (cat_id, cat_name) in enumerate(categories_to_scrape, 1):
|
||
progress = int((idx / len(categories_to_scrape)) * 80) + 10
|
||
|
||
yield f"data: {json_module.dumps({'type': 'processing', 'message': f'🔍 [{idx}/{len(categories_to_scrape)}] {cat_name} çekiliyor...', 'progress': progress, 'current': idx, 'total': len(categories_to_scrape)})}\n\n"
|
||
await asyncio.sleep(0.1)
|
||
|
||
try:
|
||
pass
|
||
# API call notification
|
||
yield f"data: {json_module.dumps({'type': 'api', 'message': f'🌐 API: Trendyol Best Seller - Kategori ID: {cat_id}', 'progress': progress})}\n\n"
|
||
await asyncio.sleep(0.1)
|
||
|
||
scraper = TrendyolScraper(cat_id)
|
||
products = scraper.fetch_all_products()
|
||
|
||
if products:
|
||
pass
|
||
# Save to file
|
||
os.makedirs(CATEGORIES_DIR, exist_ok=True)
|
||
filename = f"{CATEGORIES_DIR}/{cat_name.replace(' ', '_')}_{cat_id}.json"
|
||
|
||
data = {
|
||
"category_id": cat_id,
|
||
"category_name": cat_name,
|
||
"total_products": len(products),
|
||
"scraped_at": datetime.now().isoformat(),
|
||
"products": products
|
||
}
|
||
|
||
with open(filename, 'w', encoding='utf-8') as f:
|
||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||
|
||
results["successful"] += 1
|
||
results["total_products"] += len(products)
|
||
results["details"].append({
|
||
"category_id": cat_id,
|
||
"category_name": cat_name,
|
||
"success": True,
|
||
"total_products": len(products),
|
||
"file_path": filename
|
||
})
|
||
|
||
yield f"data: {json_module.dumps({'type': 'success', 'message': f'✅ {cat_name} tamamlandı - {len(products)} ürün bulundu', 'progress': progress})}\n\n"
|
||
await asyncio.sleep(0.1)
|
||
else:
|
||
results["failed"] += 1
|
||
results["details"].append({
|
||
"category_id": cat_id,
|
||
"category_name": cat_name,
|
||
"success": False,
|
||
"total_products": 0,
|
||
"file_path": None
|
||
})
|
||
yield f"data: {json_module.dumps({'type': 'warning', 'message': f'⚠️ {cat_name} - Ürün bulunamadı', 'progress': progress})}\n\n"
|
||
await asyncio.sleep(0.1)
|
||
|
||
except Exception as e:
|
||
results["failed"] += 1
|
||
results["details"].append({
|
||
"category_id": cat_id,
|
||
"category_name": cat_name,
|
||
"success": False,
|
||
"total_products": 0,
|
||
"file_path": None
|
||
})
|
||
yield f"data: {json_module.dumps({'type': 'error', 'message': f'❌ {cat_name} - Hata: {str(e)}', 'progress': progress})}\n\n"
|
||
await asyncio.sleep(0.1)
|
||
|
||
# Rate limiting
|
||
time.sleep(2.0)
|
||
|
||
# ============================================
|
||
# Sosyal Kanıt Verilerini Topla
|
||
# ============================================
|
||
# print(f"\n🔍 DEBUG: Sosyal kanıt toplama bölümüne ulaşıldı")
|
||
# print(f"🔍 DEBUG: results['details'] uzunluğu: {len(results.get('details', []))}")
|
||
|
||
yield f"data: {json_module.dumps({'type': 'info', 'message': '📊 Sosyal kanıt verileri toplanıyor...', 'progress': 85})}\n\n"
|
||
await asyncio.sleep(0.5)
|
||
|
||
# Collect all product IDs from scraped data AND product info
|
||
all_product_ids = []
|
||
product_info_map = {} # Map product_id to product info (name, image, url, category)
|
||
for detail in results["details"]:
|
||
if detail["success"] and detail["file_path"]:
|
||
category_name = detail.get("category_name", "Bilinmeyen Kategori")
|
||
try:
|
||
with open(detail["file_path"], 'r', encoding='utf-8') as f:
|
||
cat_data = json.load(f)
|
||
products = cat_data.get("products", [])
|
||
# print(f"🔍 DEBUG: {detail['file_path']} dosyasından {len(products)} ürün bulundu")
|
||
for product in products:
|
||
product_id = product.get("id")
|
||
if product_id:
|
||
all_product_ids.append(int(product_id))
|
||
# Extract rating data
|
||
rating_score_obj = product.get("ratingScore", {})
|
||
rating = rating_score_obj.get("averageRating", 0) if isinstance(rating_score_obj, dict) else 0
|
||
rating_count = rating_score_obj.get("totalCount", 0) if isinstance(rating_score_obj, dict) else 0
|
||
|
||
# Extract barcode from first variant
|
||
barcode = ""
|
||
merchant_listings = product.get("merchantListings", [])
|
||
if merchant_listings and len(merchant_listings) > 0:
|
||
variants = merchant_listings[0].get("variants", [])
|
||
if variants and len(variants) > 0:
|
||
barcode = variants[0].get("barcode", "")
|
||
|
||
# Store product info with category, brand, price, rating, and barcode
|
||
product_info_map[str(product_id)] = {
|
||
"name": product.get("name", ""),
|
||
"imageUrl": product.get("imageUrl", ""),
|
||
"url": product.get("url", ""),
|
||
"category": category_name,
|
||
"brand": product.get("brand", {}).get("name", "Bilinmeyen Marka"),
|
||
"price": product.get("price", {}).get("sellingPrice", 0),
|
||
"rating": round(rating, 2) if rating else 0,
|
||
"rating_count": rating_count,
|
||
"barcode": barcode,
|
||
"barcode_country": get_country_from_barcode(barcode), # Extract country from barcode prefix
|
||
"origin_country": "Bilinmeyen" # Not available in product data
|
||
}
|
||
except Exception as e:
|
||
pass
|
||
# print(f"⚠️ DEBUG: Dosya okuma hatası {detail['file_path']}: {str(e)}")
|
||
pass
|
||
|
||
# Collect social proof data in batches
|
||
social_proof_data = {}
|
||
total_products = len(all_product_ids)
|
||
processed = 0
|
||
batch_size = 5
|
||
|
||
# print(f"🔍 DEBUG: Toplam {total_products} ürün ID'si toplandı")
|
||
# print(f"🔍 DEBUG: İlk 5 ürün ID'si: {all_product_ids[:5] if all_product_ids else 'YOK'}")
|
||
|
||
if total_products > 0:
|
||
pass
|
||
# print(f"✅ DEBUG: total_products > 0 koşulu sağlandı, sosyal kanıt toplama başlıyor")
|
||
for chunk in _chunked(all_product_ids, batch_size):
|
||
try:
|
||
pass
|
||
# print(f"🔍 DEBUG: {len(chunk)} ürün için sosyal kanıt API'ye istek gönderiliyor: {chunk}")
|
||
data = fetch_social_proof(chunk)
|
||
# print(f"🔍 DEBUG: API yanıtı alındı: {type(data)}, 'result' var mı: {'result' in data if data else False}")
|
||
if data and "result" in data:
|
||
items = data.get("result", [])
|
||
# print(f"🔍 DEBUG: {len(items)} adet sonuç bulundu")
|
||
for item in items:
|
||
pid = item.get("contentId")
|
||
if pid:
|
||
pid_str = str(pid)
|
||
# Get product info from map
|
||
product_info = product_info_map.get(pid_str, {})
|
||
social_proof_data[pid_str] = {
|
||
"page_views": item.get("pageViewCount", 0),
|
||
"orders": item.get("orderCount", 0),
|
||
"baskets": item.get("basketCount", 0),
|
||
"favorites": item.get("favoriteCount", 0),
|
||
"name": product_info.get("name", ""),
|
||
"imageUrl": product_info.get("imageUrl", ""),
|
||
"url": product_info.get("url", ""),
|
||
"category": product_info.get("category", "Bilinmeyen Kategori"),
|
||
"brand": product_info.get("brand", "Bilinmeyen Marka"),
|
||
"price": product_info.get("price", 0),
|
||
"rating": product_info.get("rating", 0),
|
||
"rating_count": product_info.get("rating_count", 0),
|
||
"barcode": product_info.get("barcode", ""),
|
||
"origin_country": product_info.get("origin_country", "Bilinmeyen")
|
||
}
|
||
except Exception as e:
|
||
pass
|
||
# print(f"❌ DEBUG: Sosyal kanıt API hatası: {str(e)}")
|
||
pass
|
||
|
||
processed += len(chunk)
|
||
progress_pct = int((processed / total_products) * 5) + 85 # 85-90%
|
||
yield f"data: {json_module.dumps({'type': 'info', 'message': f'📊 Sosyal kanıt: {processed}/{total_products} ürün', 'progress': progress_pct})}\n\n"
|
||
await asyncio.sleep(0.02)
|
||
time.sleep(0.5) # Rate limiting
|
||
|
||
# print(f"✅ DEBUG: Sosyal kanıt toplama tamamlandı. Toplanan veri: {len(social_proof_data)} ürün")
|
||
yield f"data: {json_module.dumps({'type': 'success', 'message': f'✅ Sosyal kanıt verileri toplandı ({len(social_proof_data)} ürün)', 'progress': 90})}\n\n"
|
||
await asyncio.sleep(0.3)
|
||
else:
|
||
pass
|
||
# print(f"⚠️ DEBUG: total_products = 0, sosyal kanıt toplanmadı")
|
||
pass
|
||
|
||
# Generate report file
|
||
yield f"data: {json_module.dumps({'type': 'info', 'message': '📝 Rapor dosyası oluşturuluyor...', 'progress': 92})}\n\n"
|
||
await asyncio.sleep(0.5)
|
||
|
||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
safe_name = name.lower().replace(" ", "_").replace("ı", "i").replace("ş", "s").replace("ğ", "g").replace("ü", "u").replace("ö", "o").replace("ç", "c")
|
||
|
||
reports_dir = REPORTS_DIR
|
||
os.makedirs(reports_dir, exist_ok=True)
|
||
|
||
json_filename = f"{reports_dir}/{safe_name}_{timestamp}.json"
|
||
combined_data = {
|
||
"report_name": name,
|
||
"category": main_category.name,
|
||
"created_at": datetime.now().isoformat(),
|
||
"total_subcategories": len(categories_to_scrape),
|
||
"total_products": results["total_products"],
|
||
"details": results["details"]
|
||
}
|
||
|
||
with open(json_filename, 'w', encoding='utf-8') as f:
|
||
json.dump(combined_data, f, ensure_ascii=False, indent=2)
|
||
|
||
# Save to database
|
||
yield f"data: {json_module.dumps({'type': 'info', 'message': '💾 Veritabanına kaydediliyor...', 'progress': 95})}\n\n"
|
||
await asyncio.sleep(0.5)
|
||
|
||
new_report = Report(
|
||
name=name,
|
||
category_id=category_id,
|
||
total_products=results["total_products"],
|
||
total_subcategories=len(categories_to_scrape),
|
||
json_file_path=json_filename,
|
||
html_file_path=None,
|
||
created_at=datetime.now()
|
||
)
|
||
|
||
db.add(new_report)
|
||
db.commit()
|
||
db.refresh(new_report)
|
||
|
||
# Save social proof data to persistent cache
|
||
# print(f"\n🔍 DEBUG: Sosyal kanıt kaydetme bölümü - social_proof_data uzunluğu: {len(social_proof_data)}")
|
||
if social_proof_data:
|
||
enrich_dir = f"{REPORTS_DIR}/enrich_{new_report.id}"
|
||
os.makedirs(enrich_dir, exist_ok=True)
|
||
social_file = f"{enrich_dir}/social.json"
|
||
|
||
social_output = {
|
||
"products": len(all_product_ids),
|
||
"total": {
|
||
"page_views": sum(d.get("page_views", 0) for d in social_proof_data.values()),
|
||
"orders": sum(d.get("orders", 0) for d in social_proof_data.values()),
|
||
"baskets": sum(d.get("baskets", 0) for d in social_proof_data.values()),
|
||
"favorites": sum(d.get("favorites", 0) for d in social_proof_data.values())
|
||
},
|
||
"missing": total_products - len(social_proof_data),
|
||
"details": social_proof_data
|
||
}
|
||
|
||
# print(f"✅ DEBUG: Sosyal kanıt dosyası kaydediliyor: {social_file}")
|
||
# print(f"🔍 DEBUG: Toplam metrikler: {social_output['total']}")
|
||
with open(social_file, 'w', encoding='utf-8') as f:
|
||
json.dump(social_output, f, ensure_ascii=False, indent=2)
|
||
# print(f"✅ DEBUG: Sosyal kanıt dosyası başarıyla kaydedildi")
|
||
else:
|
||
pass
|
||
# print(f"⚠️ DEBUG: social_proof_data boş, dosya kaydedilmedi")
|
||
|
||
# Final success message with report ID
|
||
yield f"data: {json_module.dumps({'type': 'complete', 'message': '✅ Rapor başarıyla oluşturuldu!', 'progress': 100, 'report_id': new_report.id, 'total_products': results['total_products'], 'successful': results['successful']})}\n\n"
|
||
await asyncio.sleep(0.1)
|
||
|
||
except Exception as e:
|
||
yield f"data: {json_module.dumps({'type': 'error', 'message': f'❌ Kritik hata: {str(e)}', 'progress': -1})}\n\n"
|
||
|
||
return StreamingResponse(progress_stream(), media_type="text/event-stream")
|
||
|
||
|
||
# Update report
|
||
|
||
@app.get("/api/reports/{report_id}", response_model=ReportResponse)
|
||
def get_report(report_id: int, db: Session = Depends(get_db)):
|
||
"""Get a specific report by ID"""
|
||
report = db.query(Report).filter(Report.id == report_id).first()
|
||
if not report:
|
||
raise HTTPException(status_code=404, detail="Report not found")
|
||
|
||
category = db.query(Category).filter(Category.id == report.category_id).first()
|
||
|
||
return {
|
||
"id": report.id,
|
||
"name": report.name,
|
||
"category_id": report.category_id,
|
||
"category_name": category.name if category else "Unknown",
|
||
"total_products": report.total_products,
|
||
"total_subcategories": report.total_subcategories,
|
||
"json_file_path": report.json_file_path,
|
||
"html_file_path": report.html_file_path,
|
||
"created_at": report.created_at
|
||
}
|
||
|
||
|
||
# Progress tracking (in-memory)
|
||
scraping_progress = {}
|
||
|
||
# Get scraping progress
|
||
@app.get("/api/reports/progress/{task_id}")
|
||
def get_scraping_progress(task_id: str):
|
||
"""Get real-time scraping progress"""
|
||
if task_id not in scraping_progress:
|
||
raise HTTPException(status_code=404, detail="Task not found")
|
||
|
||
return scraping_progress[task_id]
|
||
|
||
# Background task for scraping
|
||
def scrape_in_background(task_id: str, report_name: str, category_id: int, categories_to_scrape: list, category_name: str):
|
||
"""Background task that handles scraping with progress updates"""
|
||
from scraper import TrendyolScraper
|
||
import json
|
||
import os
|
||
from datetime import datetime
|
||
import time
|
||
|
||
# Initialize progress
|
||
scraping_progress[task_id] = {
|
||
"status": "running",
|
||
"progress": 0,
|
||
"current": 0,
|
||
"total": len(categories_to_scrape),
|
||
"current_category": "",
|
||
"logs": [],
|
||
"successful": 0,
|
||
"failed": 0,
|
||
"total_products": 0
|
||
}
|
||
|
||
def add_log(message: str, log_type: str = "info"):
|
||
"""Add log to progress"""
|
||
timestamp = datetime.now().strftime("%H:%M:%S")
|
||
scraping_progress[task_id]["logs"].append({
|
||
"timestamp": timestamp,
|
||
"message": message,
|
||
"type": log_type
|
||
})
|
||
|
||
add_log(f"📂 {category_name} kategorisi seçildi")
|
||
add_log(f"📊 {len(categories_to_scrape)} alt kategori bulundu")
|
||
add_log("🚀 Veri çekimi başlatılıyor...")
|
||
|
||
# Scrape each category
|
||
results = {
|
||
"successful": 0,
|
||
"failed": 0,
|
||
"total_products": 0,
|
||
"details": []
|
||
}
|
||
|
||
for idx, (cat_id, cat_name) in enumerate(categories_to_scrape, 1):
|
||
scraping_progress[task_id]["current"] = idx
|
||
scraping_progress[task_id]["current_category"] = cat_name
|
||
scraping_progress[task_id]["progress"] = int((idx / len(categories_to_scrape)) * 80) + 10
|
||
|
||
add_log(f"🔍 [{idx}/{len(categories_to_scrape)}] {cat_name} çekiliyor...")
|
||
|
||
try:
|
||
scraper = TrendyolScraper(cat_id)
|
||
products = scraper.fetch_all_products()
|
||
|
||
if products:
|
||
pass
|
||
# Save to file
|
||
os.makedirs(CATEGORIES_DIR, exist_ok=True)
|
||
filename = f"{CATEGORIES_DIR}/{cat_name.replace(' ', '_')}_{cat_id}.json"
|
||
|
||
data = {
|
||
"category_id": cat_id,
|
||
"category_name": cat_name,
|
||
"total_products": len(products),
|
||
"scraped_at": datetime.now().isoformat(),
|
||
"products": products
|
||
}
|
||
|
||
with open(filename, 'w', encoding='utf-8') as f:
|
||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||
|
||
results["successful"] += 1
|
||
results["total_products"] += len(products)
|
||
results["details"].append({
|
||
"category_id": cat_id,
|
||
"category_name": cat_name,
|
||
"success": True,
|
||
"total_products": len(products),
|
||
"file_path": filename
|
||
})
|
||
|
||
scraping_progress[task_id]["successful"] += 1
|
||
scraping_progress[task_id]["total_products"] += len(products)
|
||
|
||
add_log(f"✅ {cat_name} tamamlandı - {len(products)} ürün", "success")
|
||
else:
|
||
results["failed"] += 1
|
||
results["details"].append({
|
||
"category_id": cat_id,
|
||
"category_name": cat_name,
|
||
"success": False,
|
||
"total_products": 0,
|
||
"file_path": None
|
||
})
|
||
scraping_progress[task_id]["failed"] += 1
|
||
add_log(f"⚠️ {cat_name} - Ürün bulunamadı", "warning")
|
||
|
||
except Exception as e:
|
||
results["failed"] += 1
|
||
results["details"].append({
|
||
"category_id": cat_id,
|
||
"category_name": cat_name,
|
||
"success": False,
|
||
"total_products": 0,
|
||
"file_path": None
|
||
})
|
||
scraping_progress[task_id]["failed"] += 1
|
||
add_log(f"❌ {cat_name} - Hata: {str(e)}", "error")
|
||
|
||
# Rate limiting
|
||
time.sleep(2.0)
|
||
|
||
# Generate report file
|
||
scraping_progress[task_id]["progress"] = 90
|
||
add_log("📝 Rapor dosyası oluşturuluyor...")
|
||
|
||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
safe_name = report_name.lower().replace(" ", "_").replace("ı", "i").replace("ş", "s").replace("ğ", "g").replace("ü", "u").replace("ö", "o").replace("ç", "c")
|
||
|
||
reports_dir = REPORTS_DIR
|
||
os.makedirs(reports_dir, exist_ok=True)
|
||
|
||
json_filename = f"{reports_dir}/{safe_name}_{timestamp}.json"
|
||
combined_data = {
|
||
"report_name": report_name,
|
||
"category": category_name,
|
||
"created_at": datetime.now().isoformat(),
|
||
"total_subcategories": len(categories_to_scrape),
|
||
"total_products": results["total_products"],
|
||
"details": results["details"]
|
||
}
|
||
|
||
with open(json_filename, 'w', encoding='utf-8') as f:
|
||
json.dump(combined_data, f, ensure_ascii=False, indent=2)
|
||
|
||
# Save to database
|
||
scraping_progress[task_id]["progress"] = 95
|
||
add_log("💾 Veritabanına kaydediliyor...")
|
||
|
||
db = SessionLocal()
|
||
try:
|
||
new_report = Report(
|
||
name=report_name,
|
||
category_id=category_id,
|
||
total_products=results["total_products"],
|
||
total_subcategories=len(categories_to_scrape),
|
||
json_file_path=json_filename,
|
||
html_file_path=None,
|
||
created_at=datetime.now()
|
||
)
|
||
|
||
db.add(new_report)
|
||
db.commit()
|
||
db.refresh(new_report)
|
||
|
||
# Mark as completed
|
||
scraping_progress[task_id]["status"] = "completed"
|
||
scraping_progress[task_id]["progress"] = 100
|
||
scraping_progress[task_id]["report_id"] = new_report.id
|
||
add_log(f"✅ Rapor başarıyla oluşturuldu!", "success")
|
||
add_log(f"📊 Toplam {results['successful']} kategori, {results['total_products']} ürün", "success")
|
||
|
||
except Exception as e:
|
||
scraping_progress[task_id]["status"] = "failed"
|
||
add_log(f"❌ Veritabanı hatası: {str(e)}", "error")
|
||
finally:
|
||
db.close()
|
||
|
||
|
||
# Create report with real-time SSE progress (SYNCHRONOUS)
|
||
@app.put("/api/reports/{report_id}")
|
||
def update_report(report_id: int, report: ReportUpdate, db: Session = Depends(get_db)):
|
||
"""Update report name"""
|
||
db_report = db.query(Report).filter(Report.id == report_id).first()
|
||
if not db_report:
|
||
raise HTTPException(status_code=404, detail="Report not found")
|
||
|
||
db_report.name = report.name
|
||
db.commit()
|
||
db.refresh(db_report)
|
||
|
||
return {"message": "Report updated successfully", "id": report_id, "name": report.name}
|
||
|
||
|
||
# Delete report
|
||
@app.delete("/api/reports/{report_id}")
|
||
def delete_report(report_id: int, db: Session = Depends(get_db)):
|
||
"""Delete a report"""
|
||
report = db.query(Report).filter(Report.id == report_id).first()
|
||
if not report:
|
||
raise HTTPException(status_code=404, detail="Report not found")
|
||
|
||
# Delete files if exist
|
||
import os
|
||
import shutil
|
||
if report.json_file_path and os.path.exists(report.json_file_path):
|
||
os.remove(report.json_file_path)
|
||
if report.html_file_path and os.path.exists(report.html_file_path):
|
||
os.remove(report.html_file_path)
|
||
|
||
# Delete enrich directory if exists
|
||
enrich_dir = f"{REPORTS_DIR}/enrich_{report_id}"
|
||
if os.path.exists(enrich_dir):
|
||
shutil.rmtree(enrich_dir)
|
||
|
||
db.delete(report)
|
||
db.commit()
|
||
|
||
return {"message": "Report deleted successfully", "id": report_id}
|
||
|
||
|
||
# Get dashboard data for a report
|
||
# Cache for dashboard data (max 10 reports, 1 hour TTL)
|
||
from functools import lru_cache
|
||
from datetime import timedelta
|
||
import hashlib
|
||
|
||
dashboard_cache = {}
|
||
DASHBOARD_CACHE_TTL = 3600 # 1 hour in seconds
|
||
|
||
@app.get("/api/reports/{report_id}/dashboard-data")
|
||
def get_dashboard_data(report_id: int, db: Session = Depends(get_db)):
|
||
"""
|
||
Process report data and return dashboard KPIs and chart data (with caching)
|
||
"""
|
||
import json
|
||
import os
|
||
from collections import defaultdict
|
||
|
||
# Check cache first
|
||
cache_key = f"dashboard_{report_id}"
|
||
# TEMPORARILY DISABLED FOR DEBUGGING - Re-enable after fixing category sales
|
||
# if cache_key in dashboard_cache:
|
||
# cached_data, cached_time = dashboard_cache[cache_key]
|
||
# if time.time() - cached_time < DASHBOARD_CACHE_TTL:
|
||
# print(f"📊 Cache hit for report {report_id}")
|
||
# return cached_data
|
||
print(f"📊 Cache bypassed for debugging - recalculating dashboard data for report {report_id}")
|
||
|
||
# Get report from database
|
||
report = db.query(Report).filter(Report.id == report_id).first()
|
||
if not report:
|
||
raise HTTPException(status_code=404, detail="Report not found")
|
||
|
||
# Read report JSON file
|
||
if not report.json_file_path or not os.path.exists(report.json_file_path):
|
||
raise HTTPException(status_code=404, detail="Report data file not found")
|
||
|
||
try:
|
||
with open(report.json_file_path, 'r', encoding='utf-8') as f:
|
||
report_data = json.load(f)
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=f"Error reading report file: {str(e)}")
|
||
|
||
# Load all product data from category files
|
||
all_products = []
|
||
categories_data = defaultdict(list)
|
||
brands_data = defaultdict(int)
|
||
|
||
for detail in report_data.get("details", []):
|
||
if detail.get("success") and detail.get("file_path"):
|
||
file_path = detail["file_path"]
|
||
if os.path.exists(file_path):
|
||
try:
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
cat_data = json.load(f)
|
||
products = cat_data.get("products", [])
|
||
|
||
# Enrich products with category name from report details
|
||
# Clean category name: remove trailing ID pattern (e.g., "Android Cep Telefonu 164461" → "Android Cep Telefonu")
|
||
cat_name_raw = detail["category_name"]
|
||
# Remove trailing space + numbers pattern
|
||
cat_name = re.sub(r'\s+\d+$', '', cat_name_raw)
|
||
|
||
for product in products:
|
||
# Update category field with actual name
|
||
if isinstance(product.get("category"), dict):
|
||
product["category"]["name"] = cat_name
|
||
else:
|
||
product["category"] = {"id": 0, "name": cat_name}
|
||
|
||
all_products.extend(products)
|
||
|
||
# Group by category
|
||
categories_data[cat_name].extend(products)
|
||
|
||
# Count brands
|
||
for product in products:
|
||
brand_name = product.get("brand", {}).get("name", "Unknown")
|
||
brands_data[brand_name] += 1
|
||
except:
|
||
continue
|
||
|
||
# Calculate KPIs
|
||
total_products = len(all_products)
|
||
total_brands = len(brands_data)
|
||
|
||
# Price calculations
|
||
prices = [p.get("price", {}).get("sellingPrice", 0) for p in all_products if p.get("price", {}).get("sellingPrice")]
|
||
avg_price = sum(prices) / len(prices) if prices else 0
|
||
min_price = min(prices) if prices else 0
|
||
max_price = max(prices) if prices else 0
|
||
|
||
# DISABLED: Discount calculations (not needed per user request)
|
||
# discounted_count = sum(1 for p in all_products if p.get("price", {}).get("discountedPrice") and p.get("price", {}).get("discountedPrice") < p.get("price", {}).get("originalPrice", 0))
|
||
# discount_rate = (discounted_count / total_products * 100) if total_products > 0 else 0
|
||
|
||
# DISABLED: Stock calculations (not needed per user request)
|
||
# in_stock_count = sum(1 for p in all_products if p.get("inStock", False))
|
||
# out_of_stock_count = total_products - in_stock_count
|
||
# running_out_count = sum(1 for p in all_products if p.get("isRunningOut", False))
|
||
|
||
# Rating calculations
|
||
ratings = []
|
||
for p in all_products:
|
||
rating = p.get("rating", 0)
|
||
# Handle if rating is a dict (ratingScore)
|
||
if isinstance(rating, dict):
|
||
rating = rating.get("averageRating", 0)
|
||
if rating:
|
||
ratings.append(rating)
|
||
avg_rating = sum(ratings) / len(ratings) if ratings else 0
|
||
|
||
# DISABLED: Flash products and discount calculations (not needed per user request)
|
||
# flash_count = sum(1 for p in all_products if p.get("isFlash", False))
|
||
|
||
# Advanced KPIs
|
||
# DISABLED: 1. Discount Depth (average discount percentage for discounted products)
|
||
# discount_depths = []
|
||
# for p in all_products:
|
||
# original = p.get("price", {}).get("originalPrice", 0)
|
||
# discounted = p.get("price", {}).get("discountedPrice", 0)
|
||
# if original > 0 and discounted > 0 and discounted < original:
|
||
# discount_depths.append((original - discounted) / original * 100)
|
||
# avg_discount_depth = sum(discount_depths) / len(discount_depths) if discount_depths else 0
|
||
|
||
# 2. Median Price (for price premium index calculation) - DOĞRU HESAPLAMA
|
||
median_price = float(np.percentile(prices, 50)) if prices else 0
|
||
|
||
# DISABLED: 3. Stock Risk Metric (running_out / in_stock ratio) - not needed per user request
|
||
# stock_risk = (running_out_count / in_stock_count * 100) if in_stock_count > 0 else 0
|
||
|
||
# 4. Low Rating Products Count (rating < 3.0)
|
||
low_rating_count = sum(1 for r in ratings if r < 3.0)
|
||
low_rating_rate = (low_rating_count / len(ratings) * 100) if ratings else 0
|
||
|
||
# KPIs
|
||
kpis = {
|
||
"total_products": total_products,
|
||
"total_subcategories": report.total_subcategories,
|
||
"total_brands": total_brands,
|
||
"avg_price": round(avg_price, 2),
|
||
"median_price": round(median_price, 2),
|
||
# DISABLED: Discount-related KPIs (not needed per user request)
|
||
# "discounted_products": discounted_count,
|
||
# "discount_rate": round(discount_rate, 2),
|
||
# "avg_discount_depth": round(avg_discount_depth, 2),
|
||
# DISABLED: Stock-related KPIs (not needed per user request)
|
||
# "out_of_stock": out_of_stock_count,
|
||
# "in_stock": in_stock_count,
|
||
# "running_out": running_out_count,
|
||
# "stock_risk": round(stock_risk, 2),
|
||
"avg_rating": round(avg_rating, 2),
|
||
"low_rating_count": low_rating_count,
|
||
"low_rating_rate": round(low_rating_rate, 2),
|
||
# DISABLED: Flash products (not needed per user request)
|
||
# "flash_products": flash_count,
|
||
"min_price": round(min_price, 2),
|
||
"max_price": round(max_price, 2)
|
||
}
|
||
|
||
# Price distribution (for bar chart)
|
||
price_ranges = {
|
||
"0-100": 0,
|
||
"100-250": 0,
|
||
"250-500": 0,
|
||
"500-1000": 0,
|
||
"1000+": 0
|
||
}
|
||
for price in prices:
|
||
if price < 100:
|
||
price_ranges["0-100"] += 1
|
||
elif price < 250:
|
||
price_ranges["100-250"] += 1
|
||
elif price < 500:
|
||
price_ranges["250-500"] += 1
|
||
elif price < 1000:
|
||
price_ranges["500-1000"] += 1
|
||
else:
|
||
price_ranges["1000+"] += 1
|
||
|
||
# Top 10 categories by sales (orders from social proof data)
|
||
# First, try to get social proof data to calculate by sales
|
||
category_sales = {}
|
||
try:
|
||
pass
|
||
# Try to get social proof data - check for different batch sizes
|
||
# The social proof cache uses format: {report_id}:b{batch_size}
|
||
# Try common batch sizes: 100, 5 (default), 10, 20
|
||
social_data = None
|
||
for batch_size in [100, 5, 10, 20]:
|
||
social_cache_key = f"{report_id}:b{batch_size}"
|
||
if social_cache_key in social_proof_cache:
|
||
social_data = social_proof_cache.get(social_cache_key)
|
||
if social_data:
|
||
pass
|
||
# print(f"[DEBUG] Found social proof cache with batch_size={batch_size}")
|
||
break
|
||
|
||
# If not in cache, try loading from persisted JSON
|
||
if not social_data:
|
||
pass
|
||
# print(f"[DEBUG] No social proof cache found, trying persisted JSON")
|
||
persisted = _load_json(f"{REPORTS_DIR}/enrich_{report_id}/social.json")
|
||
if persisted:
|
||
social_data = {
|
||
"details": persisted.get("details", {}),
|
||
"aggregation": {
|
||
"products": persisted.get("products", 0),
|
||
"total": persisted.get("total", {}),
|
||
"missing": persisted.get("missing", 0)
|
||
}
|
||
}
|
||
# Cache it for future use with batch_size=5 (default)
|
||
social_proof_cache.set(f"{report_id}:b5", social_data)
|
||
# print(f"[DEBUG] Loaded social proof data from JSON with {len(social_data['details'])} products")
|
||
else:
|
||
pass
|
||
# print(f"[DEBUG] No persisted social proof JSON found for report {report_id}")
|
||
|
||
if social_data:
|
||
social_details = social_data.get("details", {})
|
||
# print(f"[DEBUG] Found social data with {len(social_details)} products")
|
||
|
||
# Calculate sales per category
|
||
for cat_name, cat_products in categories_data.items():
|
||
total_orders = 0
|
||
for product in cat_products:
|
||
pid = product.get("id")
|
||
if pid and str(pid) in social_details:
|
||
product_orders = social_details[str(pid)].get("orders", 0)
|
||
total_orders += product_orders
|
||
if product_orders > 0:
|
||
pass
|
||
# print(f"[DEBUG] Product {pid} in {cat_name}: {product_orders} orders")
|
||
|
||
category_sales[cat_name] = {
|
||
"name": cat_name,
|
||
"count": len(cat_products),
|
||
"total_orders": total_orders
|
||
}
|
||
# print(f"[DEBUG] Category {cat_name}: {total_orders} total orders from {len(cat_products)} products")
|
||
|
||
# Sort by total_orders (sales)
|
||
top_categories = sorted(
|
||
category_sales.values(),
|
||
key=lambda x: x["total_orders"],
|
||
reverse=True
|
||
)[:20]
|
||
# print(f"[DEBUG] Top categories sorted by orders: {[(c['name'], c['total_orders']) for c in top_categories[:3]]}")
|
||
else:
|
||
pass
|
||
# Fallback: If no social proof data, sort by product count
|
||
top_categories = sorted(
|
||
[{"name": cat, "count": len(products), "total_orders": 0} for cat, products in categories_data.items()],
|
||
key=lambda x: x["count"],
|
||
reverse=True
|
||
)[:20]
|
||
except Exception as e:
|
||
pass
|
||
# print(f"[DEBUG] Error calculating category sales: {str(e)}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
# Fallback: If any error, sort by product count
|
||
top_categories = sorted(
|
||
[{"name": cat, "count": len(products), "total_orders": 0} for cat, products in categories_data.items()],
|
||
key=lambda x: x["count"],
|
||
reverse=True
|
||
)[:20]
|
||
|
||
# Top 10 brands by product count
|
||
top_brands = sorted(
|
||
[{"name": brand, "count": count} for brand, count in brands_data.items()],
|
||
key=lambda x: x["count"],
|
||
reverse=True
|
||
)[:20]
|
||
|
||
# DISABLED: Stock status distribution (for pie chart) - not needed per user request
|
||
# stock_status = {
|
||
# "in_stock": in_stock_count,
|
||
# "out_of_stock": out_of_stock_count,
|
||
# "running_out": running_out_count
|
||
# }
|
||
|
||
# Rating distribution
|
||
rating_distribution = {
|
||
"0-1": 0,
|
||
"1-2": 0,
|
||
"2-3": 0,
|
||
"3-4": 0,
|
||
"4-5": 0
|
||
}
|
||
for product in all_products:
|
||
rating = product.get("rating", 0)
|
||
# Handle if rating is a dict (ratingScore)
|
||
if isinstance(rating, dict):
|
||
rating = rating.get("averageRating", 0)
|
||
|
||
if rating < 1:
|
||
rating_distribution["0-1"] += 1
|
||
elif rating < 2:
|
||
rating_distribution["1-2"] += 1
|
||
elif rating < 3:
|
||
rating_distribution["2-3"] += 1
|
||
elif rating < 4:
|
||
rating_distribution["3-4"] += 1
|
||
else:
|
||
rating_distribution["4-5"] += 1
|
||
|
||
# Boxplot data (brand price statistics) - Top 10 brands
|
||
brand_price_stats = []
|
||
for brand_name in [b["name"] for b in top_brands[:10]]:
|
||
brand_products = [p for p in all_products if p.get("brand", {}).get("name") == brand_name]
|
||
brand_prices = [p.get("price", {}).get("sellingPrice", 0) for p in brand_products if p.get("price", {}).get("sellingPrice")]
|
||
|
||
if brand_prices and len(brand_prices) >= 4: # En az 4 veri noktası gerekli
|
||
# DOĞRU İSTATİSTİK: numpy percentile kullanımı
|
||
percentiles = np.percentile(brand_prices, [0, 25, 50, 75, 100])
|
||
brand_price_stats.append({
|
||
"brand": brand_name,
|
||
"min": round(float(percentiles[0]), 2),
|
||
"q1": round(float(percentiles[1]), 2),
|
||
"median": round(float(percentiles[2]), 2),
|
||
"q3": round(float(percentiles[3]), 2),
|
||
"max": round(float(percentiles[4]), 2),
|
||
"count": len(brand_prices)
|
||
})
|
||
|
||
# Scatter plot data (price vs rating) - Sample 500 products for performance
|
||
scatter_data = []
|
||
sample_size = min(500, len(all_products))
|
||
sampled_products = random.sample(all_products, sample_size)
|
||
|
||
for p in sampled_products:
|
||
price = p.get("price", {}).get("sellingPrice", 0)
|
||
rating = p.get("rating", 0)
|
||
if isinstance(rating, dict):
|
||
rating = rating.get("averageRating", 0)
|
||
|
||
if price > 0 and rating > 0:
|
||
scatter_data.append({
|
||
"price": round(price, 2),
|
||
"rating": round(rating, 2),
|
||
"brand": p.get("brand", {}).get("name", "Unknown"),
|
||
"in_stock": p.get("inStock", False)
|
||
})
|
||
|
||
# Low rating products (rating < 3.0) - Top 20
|
||
low_rating_products = []
|
||
for p in all_products:
|
||
rating = p.get("rating", 0)
|
||
if isinstance(rating, dict):
|
||
rating = rating.get("averageRating", 0)
|
||
|
||
if rating > 0 and rating < 3.0:
|
||
low_rating_products.append({
|
||
"name": p.get("name", "Unknown")[:50],
|
||
"brand": p.get("brand", {}).get("name", "Unknown"),
|
||
"rating": round(rating, 2),
|
||
"price": round(p.get("price", {}).get("sellingPrice", 0), 2),
|
||
"in_stock": p.get("inStock", False)
|
||
})
|
||
|
||
low_rating_products = sorted(low_rating_products, key=lambda x: x["rating"])[:20]
|
||
|
||
# Brand strength score (normalized metrics)
|
||
brand_strength_scores = []
|
||
for brand_name in [b["name"] for b in top_brands[:10]]:
|
||
brand_products = [p for p in all_products if p.get("brand", {}).get("name") == brand_name]
|
||
brand_count = len(brand_products)
|
||
brand_share = (brand_count / total_products * 100) if total_products > 0 else 0
|
||
|
||
# Brand ratings
|
||
brand_ratings = []
|
||
for p in brand_products:
|
||
rating = p.get("rating", 0)
|
||
if isinstance(rating, dict):
|
||
rating = rating.get("averageRating", 0)
|
||
if rating > 0:
|
||
brand_ratings.append(rating)
|
||
brand_avg_rating = sum(brand_ratings) / len(brand_ratings) if brand_ratings else 0
|
||
|
||
# Brand stockout rate
|
||
brand_out_of_stock = sum(1 for p in brand_products if not p.get("inStock", False))
|
||
brand_stockout_rate = (brand_out_of_stock / brand_count * 100) if brand_count > 0 else 0
|
||
|
||
# Simple strength score: share + rating - stockout_rate
|
||
strength_score = brand_share + (brand_avg_rating * 5) - brand_stockout_rate
|
||
|
||
brand_strength_scores.append({
|
||
"brand": brand_name,
|
||
"share": round(brand_share, 2),
|
||
"avg_rating": round(brand_avg_rating, 2),
|
||
"stockout_rate": round(brand_stockout_rate, 2),
|
||
"strength_score": round(strength_score, 2)
|
||
})
|
||
|
||
brand_strength_scores = sorted(brand_strength_scores, key=lambda x: x["strength_score"], reverse=True)
|
||
|
||
# Heatmap: Brand × Category Matrix (top 10 brands × top 10 categories)
|
||
# Get top 10 brands by product count
|
||
top_10_brands = [b["name"] for b in top_brands]
|
||
|
||
# Get top 10 categories by product count
|
||
top_10_categories = [c["name"] for c in top_categories[:10]]
|
||
|
||
# Build matrix: count products for each brand-category combination
|
||
heatmap_data = []
|
||
for cat_name in top_10_categories:
|
||
cat_products = categories_data.get(cat_name, [])
|
||
for brand_name in top_10_brands:
|
||
# Count products for this brand-category pair
|
||
count = sum(1 for p in cat_products
|
||
if p.get("brand", {}).get("name") == brand_name)
|
||
|
||
if count > 0: # Only include non-zero combinations
|
||
heatmap_data.append({
|
||
"brand": brand_name,
|
||
"category": cat_name,
|
||
"value": count
|
||
})
|
||
|
||
# Anomalies (outlier prices using IQR method) - DOĞRU HESAPLAMA
|
||
if len(prices) > 4:
|
||
q1, q3 = np.percentile(prices, [25, 75])
|
||
iqr = q3 - q1
|
||
lower_bound = q1 - 1.5 * iqr
|
||
upper_bound = q3 + 1.5 * iqr
|
||
|
||
anomalies = []
|
||
for p in all_products:
|
||
price = p.get("price", {}).get("sellingPrice", 0)
|
||
if price > 0 and (price < lower_bound or price > upper_bound):
|
||
anomalies.append({
|
||
"name": p.get("name", "Unknown")[:50],
|
||
"brand": p.get("brand", {}).get("name", "Unknown"),
|
||
"price": round(price, 2),
|
||
"type": "expensive" if price > upper_bound else "cheap"
|
||
})
|
||
|
||
anomalies = sorted(anomalies, key=lambda x: x["price"], reverse=True)[:20]
|
||
else:
|
||
anomalies = []
|
||
|
||
# Category-based Price Analysis (Price Premium/Discount relative to overall average)
|
||
category_price_analysis = []
|
||
overall_avg_price = avg_price # Genel ortalama fiyat
|
||
|
||
for cat_name, cat_products in categories_data.items():
|
||
# Her kategorinin ürün fiyatlarını topla
|
||
cat_prices = [p.get("price", {}).get("sellingPrice", 0) for p in cat_products
|
||
if p.get("price", {}).get("sellingPrice", 0) > 0]
|
||
|
||
if cat_prices:
|
||
cat_avg_price = sum(cat_prices) / len(cat_prices)
|
||
cat_median_price = float(np.percentile(cat_prices, 50))
|
||
|
||
# Fiyat primi hesaplama: (kategori_ort - genel_ort) / genel_ort * 100
|
||
price_premium = ((cat_avg_price - overall_avg_price) / overall_avg_price * 100) if overall_avg_price > 0 else 0
|
||
|
||
category_price_analysis.append({
|
||
"category": cat_name,
|
||
"avg_price": round(cat_avg_price, 2),
|
||
"median_price": round(cat_median_price, 2),
|
||
"price_premium": round(price_premium, 2),
|
||
"product_count": len(cat_prices),
|
||
"min_price": round(min(cat_prices), 2),
|
||
"max_price": round(max(cat_prices), 2)
|
||
})
|
||
|
||
# Fiyat primine göre sırala
|
||
category_price_analysis_sorted = sorted(category_price_analysis, key=lambda x: x["price_premium"], reverse=True)
|
||
|
||
# En pahalı 10 kategori (pozitif prim)
|
||
most_expensive_categories = [c for c in category_price_analysis_sorted if c["price_premium"] > 0][:10]
|
||
|
||
# En ucuz 10 kategori (negatif prim)
|
||
most_affordable_categories = [c for c in category_price_analysis_sorted if c["price_premium"] < 0][-10:]
|
||
most_affordable_categories.reverse() # En ucuzdan en pahalıya doğru sırala
|
||
|
||
# ============================================================================
|
||
# MENŞEİ ÜLKE VE BARKOD ANALİZİ
|
||
# ============================================================================
|
||
|
||
# Ülke kodlarını tam isimlere çeviren mapping
|
||
COUNTRY_NAMES = {
|
||
"TR": "Türkiye",
|
||
"CN": "Çin",
|
||
"US": "Amerika",
|
||
"GB": "İngiltere",
|
||
"FR": "Fransa",
|
||
"DE": "Almanya",
|
||
"IT": "İtalya",
|
||
"ES": "İspanya",
|
||
"KR": "Güney Kore",
|
||
"JP": "Japonya",
|
||
"IN": "Hindistan",
|
||
"TW": "Tayvan",
|
||
"HK": "Hong Kong",
|
||
"TH": "Tayland",
|
||
"VN": "Vietnam",
|
||
"PL": "Polonya",
|
||
"CZ": "Çek Cumhuriyeti",
|
||
"RO": "Romanya",
|
||
"BG": "Bulgaristan",
|
||
"GR": "Yunanistan",
|
||
"PT": "Portekiz",
|
||
"NL": "Hollanda",
|
||
"BE": "Belçika",
|
||
"CH": "İsviçre",
|
||
"AT": "Avusturya",
|
||
"SE": "İsveç",
|
||
"NO": "Norveç",
|
||
"DK": "Danimarka",
|
||
"FI": "Finlandiya",
|
||
"RU": "Rusya",
|
||
"UA": "Ukrayna",
|
||
"AE": "Birleşik Arap Emirlikleri",
|
||
"SA": "Suudi Arabistan",
|
||
"IL": "İsrail",
|
||
"EG": "Mısır",
|
||
"ZA": "Güney Afrika",
|
||
"BR": "Brezilya",
|
||
"MX": "Meksika",
|
||
"CA": "Kanada",
|
||
"AU": "Avustralya",
|
||
"NZ": "Yeni Zelanda",
|
||
"SG": "Singapur",
|
||
"MY": "Malezya",
|
||
"ID": "Endonezya",
|
||
"PH": "Filipinler",
|
||
"PK": "Pakistan",
|
||
"BD": "Bangladeş",
|
||
"AZ": "Azerbaycan",
|
||
}
|
||
|
||
# Barkod prefix'lerine göre ülke kodu mapping (EAN-13 standardı)
|
||
BARCODE_COUNTRIES = {
|
||
# Trendyol Özel Barkodlar (Harfli)
|
||
"TYB": "Trendyol (İç Barkod)",
|
||
"SGT": "Trendyol Satıcı",
|
||
"KPE": "Trendyol Kampanya",
|
||
"RTN": "Trendyol İade",
|
||
"CDM": "Trendyol Özel",
|
||
|
||
# EAN-13 Standart Barkodlar
|
||
"00-13": "ABD & Kanada",
|
||
"190-199": "Rezerve/Özel Kullanım",
|
||
"20-29": "Mağaza İçi Kullanım",
|
||
"30-37": "Fransa",
|
||
"380": "Bulgaristan",
|
||
"383": "Slovenya",
|
||
"370": "Litvanya",
|
||
"372": "Estonya",
|
||
"373": "Moldova",
|
||
"375": "Belarus",
|
||
"377": "Ermenistan",
|
||
"379": "Kazakistan",
|
||
"385": "Hırvatistan",
|
||
"387": "Bosna Hersek",
|
||
"400-440": "Almanya",
|
||
"45-49": "Japonya",
|
||
"50": "İngiltere",
|
||
"520-521": "Yunanistan",
|
||
"528": "Lübnan",
|
||
"529": "Kıbrıs",
|
||
"530": "Arnavutluk",
|
||
"531": "Makedonya",
|
||
"535": "Malta",
|
||
"539": "İrlanda",
|
||
"54": "Belçika & Lüksemburg",
|
||
"560": "Portekiz",
|
||
"569": "İzlanda",
|
||
"57": "Danimarka",
|
||
"590": "Polonya",
|
||
"594": "Romanya",
|
||
"599": "Macaristan",
|
||
"600-601": "Güney Afrika",
|
||
"603": "Gana",
|
||
"608": "Bahreyn",
|
||
"609": "Mauritius",
|
||
"611": "Fas",
|
||
"613": "Cezayir",
|
||
"615": "Nijerya",
|
||
"616": "Kenya",
|
||
"618": "Fildişi Sahili",
|
||
"619": "Tunus",
|
||
"621": "Suriye",
|
||
"622": "Mısır",
|
||
"624": "Libya",
|
||
"625": "Ürdün",
|
||
"626": "İran",
|
||
"627": "Kuveyt",
|
||
"628": "Suudi Arabistan",
|
||
"629": "BAE",
|
||
"630": "Katar",
|
||
"631": "Umman",
|
||
"64": "Finlandiya",
|
||
"690-699": "Çin",
|
||
"70": "Norveç",
|
||
"710-719": "Rezerve/Özel Kullanım",
|
||
"729": "İsrail",
|
||
"73": "İsveç",
|
||
"740": "Guatemala",
|
||
"741": "El Salvador",
|
||
"742": "Honduras",
|
||
"743": "Nikaragua",
|
||
"744": "Kosta Rika",
|
||
"745": "Panama",
|
||
"746": "Dominik Cumhuriyeti",
|
||
"750": "Meksika",
|
||
"754-755": "Kanada",
|
||
"759": "Venezuela",
|
||
"76": "İsviçre",
|
||
"770-771": "Kolombiya",
|
||
"773": "Uruguay",
|
||
"775": "Peru",
|
||
"777": "Bolivya",
|
||
"779": "Arjantin",
|
||
"780": "Şili",
|
||
"784": "Paraguay",
|
||
"786": "Ekvador",
|
||
"789-790": "Brezilya",
|
||
"80-83": "İtalya",
|
||
"84": "İspanya",
|
||
"850": "Küba",
|
||
"858": "Slovakya",
|
||
"859": "Çek Cumhuriyeti",
|
||
"860": "Sırbistan",
|
||
"865": "Moğolistan",
|
||
"867": "Kuzey Kore",
|
||
"868-869": "Türkiye",
|
||
"87": "Hollanda",
|
||
"880": "Güney Kore",
|
||
"884": "Kamboçya",
|
||
"885": "Tayland",
|
||
"888": "Singapur",
|
||
"890": "Hindistan",
|
||
"893": "Vietnam",
|
||
"896": "Pakistan",
|
||
"899": "Endonezya",
|
||
"90-91": "Avusturya",
|
||
"93": "Avustralya",
|
||
"94": "Yeni Zelanda",
|
||
"955": "Malezya",
|
||
"958": "Makao",
|
||
"977": "Süreli Yayınlar (ISSN)",
|
||
"978-979": "Kitaplar (ISBN)",
|
||
"980": "Para İade Kuponları",
|
||
"981-984": "Kuponlar",
|
||
"99": "Kuponlar",
|
||
}
|
||
|
||
# Menşei ülke verilerini topla
|
||
origin_countries = []
|
||
barcodes = []
|
||
products_with_origin = 0
|
||
products_with_barcode = 0
|
||
|
||
for cat_name, cat_products in categories_data.items():
|
||
for product in cat_products:
|
||
# Menşei ülke bilgisini çıkar
|
||
merchant_listings = product.get("merchantListings", [])
|
||
if merchant_listings and len(merchant_listings) > 0:
|
||
custom_values = merchant_listings[0].get("customValues", [])
|
||
for cv in custom_values:
|
||
if cv.get("key") == "origin":
|
||
country_code = cv.get("value", "").upper()
|
||
if country_code:
|
||
origin_countries.append(country_code)
|
||
products_with_origin += 1
|
||
break
|
||
|
||
# Barkod bilgisini çıkar
|
||
if merchant_listings and len(merchant_listings) > 0:
|
||
variants = merchant_listings[0].get("variants", [])
|
||
if variants and len(variants) > 0:
|
||
barcode = variants[0].get("barcode", "")
|
||
if barcode:
|
||
barcodes.append(barcode)
|
||
products_with_barcode += 1
|
||
|
||
# Menşei ülke analizi
|
||
origin_country_counts = {}
|
||
for country_code in origin_countries:
|
||
origin_country_counts[country_code] = origin_country_counts.get(country_code, 0) + 1
|
||
|
||
# Ülke kodlarını tam isimlere çevir ve sırala
|
||
origin_country_data = []
|
||
for code, count in origin_country_counts.items():
|
||
country_name = COUNTRY_NAMES.get(code, f"Diğer ({code})")
|
||
percentage = (count / products_with_origin * 100) if products_with_origin > 0 else 0
|
||
origin_country_data.append({
|
||
"country_code": code,
|
||
"country_name": country_name,
|
||
"product_count": count,
|
||
"percentage": round(percentage, 2)
|
||
})
|
||
|
||
origin_country_data_sorted = sorted(origin_country_data, key=lambda x: x["product_count"], reverse=True)
|
||
|
||
# Barkod prefix analizi (ilk 3 hane)
|
||
barcode_prefixes = {}
|
||
barcode_countries_detected = {}
|
||
|
||
for barcode in barcodes:
|
||
if len(barcode) >= 3:
|
||
prefix = barcode[:3]
|
||
barcode_prefixes[prefix] = barcode_prefixes.get(prefix, 0) + 1
|
||
|
||
# Prefix'ten ülke tespiti
|
||
detected_country = "Bilinmiyor"
|
||
prefix_num = barcode[:3]
|
||
|
||
# Tek prefix kontrolü
|
||
for key, country in BARCODE_COUNTRIES.items():
|
||
if "-" in key:
|
||
start, end = key.split("-")
|
||
# Sayısal karşılaştırma yap (aralık uzunluğuna göre prefix'i kırp)
|
||
try:
|
||
range_len = len(start)
|
||
prefix_to_check = prefix_num[:range_len] if len(prefix_num) >= range_len else prefix_num
|
||
prefix_int = int(prefix_to_check) if prefix_to_check.isdigit() else -1
|
||
start_int = int(start)
|
||
end_int = int(end)
|
||
if prefix_int >= start_int and prefix_int <= end_int:
|
||
detected_country = country
|
||
break
|
||
except ValueError:
|
||
continue
|
||
elif key == prefix_num[:len(key)]:
|
||
detected_country = country
|
||
break
|
||
|
||
barcode_countries_detected[detected_country] = barcode_countries_detected.get(detected_country, 0) + 1
|
||
|
||
# Barkod prefix'lerini sırala
|
||
barcode_prefix_data = []
|
||
for prefix, count in barcode_prefixes.items():
|
||
percentage = (count / products_with_barcode * 100) if products_with_barcode > 0 else 0
|
||
|
||
# Prefix'ten ülke bul
|
||
detected_country = "Bilinmiyor"
|
||
for key, country in BARCODE_COUNTRIES.items():
|
||
if "-" in key:
|
||
start, end = key.split("-")
|
||
# Sayısal karşılaştırma yap (aralık uzunluğuna göre prefix'i kırp)
|
||
try:
|
||
range_len = len(start)
|
||
prefix_to_check = prefix[:range_len] if len(prefix) >= range_len else prefix
|
||
prefix_int = int(prefix_to_check) if prefix_to_check.isdigit() else -1
|
||
start_int = int(start)
|
||
end_int = int(end)
|
||
if prefix_int >= start_int and prefix_int <= end_int:
|
||
detected_country = country
|
||
break
|
||
except ValueError:
|
||
continue
|
||
elif key == prefix[:len(key)]:
|
||
detected_country = country
|
||
break
|
||
|
||
barcode_prefix_data.append({
|
||
"prefix": prefix,
|
||
"detected_country": detected_country,
|
||
"product_count": count,
|
||
"percentage": round(percentage, 2)
|
||
})
|
||
|
||
barcode_prefix_data_sorted = sorted(barcode_prefix_data, key=lambda x: x["product_count"], reverse=True)[:20]
|
||
|
||
# Barkoddan tespit edilen ülkeleri sırala
|
||
barcode_country_data = []
|
||
for country, count in barcode_countries_detected.items():
|
||
percentage = (count / products_with_barcode * 100) if products_with_barcode > 0 else 0
|
||
barcode_country_data.append({
|
||
"country_name": country,
|
||
"product_count": count,
|
||
"percentage": round(percentage, 2)
|
||
})
|
||
|
||
barcode_country_data_sorted = sorted(barcode_country_data, key=lambda x: x["product_count"], reverse=True)
|
||
|
||
# ============================================================================
|
||
# SATICI ANALİZİ (MERCHANT ANALYSIS)
|
||
# ============================================================================
|
||
|
||
merchants_data = {} # merchant_id -> {total_products, total_price, winner_count}
|
||
total_winners = 0
|
||
products_with_merchant = 0
|
||
|
||
for product in all_products:
|
||
merchant_listings = product.get("merchantListings", [])
|
||
if merchant_listings:
|
||
ml = merchant_listings[0] # İlk satıcı
|
||
merchant = ml.get("merchant", {})
|
||
merchant_id = merchant.get("id")
|
||
|
||
if merchant_id:
|
||
products_with_merchant += 1
|
||
|
||
# Satıcı verilerini topla
|
||
if merchant_id not in merchants_data:
|
||
pass
|
||
# Satıcı ismini al, boşsa officialName'i kullan, o da boşsa ID'yi kullan
|
||
merchant_name = merchant.get("name") or merchant.get("officialName") or f"Satıcı {merchant_id}"
|
||
merchants_data[merchant_id] = {
|
||
"merchant_id": merchant_id,
|
||
"merchant_name": merchant_name,
|
||
"product_count": 0,
|
||
"total_price": 0,
|
||
"winner_count": 0
|
||
}
|
||
|
||
merchants_data[merchant_id]["product_count"] += 1
|
||
|
||
# Fiyat bilgisi
|
||
price = product.get("price", {}).get("sellingPrice", 0)
|
||
if price > 0:
|
||
merchants_data[merchant_id]["total_price"] += price
|
||
|
||
# Kazanan satıcı mı?
|
||
if ml.get("isWinner"):
|
||
merchants_data[merchant_id]["winner_count"] += 1
|
||
total_winners += 1
|
||
|
||
# Satıcı listesi oluştur
|
||
merchant_list = []
|
||
for merchant_id, data in merchants_data.items():
|
||
avg_price = data["total_price"] / data["product_count"] if data["product_count"] > 0 else 0
|
||
winner_ratio = (data["winner_count"] / data["product_count"] * 100) if data["product_count"] > 0 else 0
|
||
|
||
# Satıcı URL'sini oluştur
|
||
merchant_url = f"https://www.trendyol.com/magaza/{data['merchant_name'].lower().replace(' ', '-')}-m-{data['merchant_id']}" if data["merchant_name"] and data["merchant_name"] != f"Satıcı {data['merchant_id']}" else None
|
||
|
||
merchant_list.append({
|
||
"merchant_id": data["merchant_id"],
|
||
"merchant_name": data["merchant_name"],
|
||
"merchant_url": merchant_url,
|
||
"product_count": data["product_count"],
|
||
"avg_price": round(avg_price, 2),
|
||
"winner_count": data["winner_count"],
|
||
"winner_ratio": round(winner_ratio, 2)
|
||
})
|
||
|
||
# Ürün sayısına göre sırala
|
||
merchant_list_sorted = sorted(merchant_list, key=lambda x: x["product_count"], reverse=True)
|
||
top_merchants = merchant_list_sorted[:20]
|
||
|
||
# Genel satıcı istatistikleri
|
||
total_merchants = len(merchants_data)
|
||
winner_percentage = (total_winners / products_with_merchant * 100) if products_with_merchant > 0 else 0
|
||
|
||
# ============================================================================
|
||
# STOK MİKTAR ANALİZİ (STOCK QUANTITY ANALYSIS)
|
||
# ============================================================================
|
||
|
||
# DISABLED: Stock quantity analysis (not needed per user request)
|
||
# stock_quantities = []
|
||
# category_stocks = {} # category -> [quantities]
|
||
# products_with_stock_info = 0
|
||
# product_to_category = {} # product_id -> category_name mapping
|
||
|
||
# # Önce ürün-kategori eşleşmesini oluştur
|
||
# for cat_name, cat_products in categories_data.items():
|
||
# for product in cat_products:
|
||
# product_id = product.get("id")
|
||
# if product_id:
|
||
# product_to_category[product_id] = cat_name
|
||
|
||
# for product in all_products:
|
||
# merchant_listings = product.get("merchantListings", [])
|
||
# if merchant_listings:
|
||
# ml = merchant_listings[0]
|
||
# variants = ml.get("variants", [])
|
||
# if variants:
|
||
# quantity = variants[0].get("quantity")
|
||
# if quantity is not None and quantity > 0:
|
||
# stock_quantities.append(quantity)
|
||
# products_with_stock_info += 1
|
||
|
||
# # Kategori bazlı stok - mapping'den al
|
||
# product_id = product.get("id")
|
||
# cat_name = product_to_category.get(product_id, "Diğer")
|
||
|
||
# if cat_name not in category_stocks:
|
||
# category_stocks[cat_name] = []
|
||
# category_stocks[cat_name].append(quantity)
|
||
|
||
# # Stok istatistikleri
|
||
# if stock_quantities:
|
||
# avg_stock = sum(stock_quantities) / len(stock_quantities)
|
||
# median_stock = float(np.percentile(stock_quantities, 50))
|
||
# total_stock = sum(stock_quantities)
|
||
# min_stock = min(stock_quantities)
|
||
# max_stock = max(stock_quantities)
|
||
# else:
|
||
# avg_stock = median_stock = total_stock = min_stock = max_stock = 0
|
||
|
||
# # Kategori bazlı stok analizi
|
||
# category_stock_analysis = []
|
||
# for cat_name, quantities in category_stocks.items():
|
||
# cat_avg_stock = sum(quantities) / len(quantities) if quantities else 0
|
||
# cat_total_stock = sum(quantities)
|
||
|
||
# category_stock_analysis.append({
|
||
# "category": cat_name,
|
||
# "avg_stock": round(cat_avg_stock, 2),
|
||
# "total_stock": cat_total_stock,
|
||
# "product_count": len(quantities),
|
||
# "min_stock": min(quantities) if quantities else 0,
|
||
# "max_stock": max(quantities) if quantities else 0
|
||
# })
|
||
|
||
# # Toplam stoka göre sırala
|
||
# category_stock_sorted = sorted(category_stock_analysis, key=lambda x: x["total_stock"], reverse=True)
|
||
|
||
# # Stok dağılımı (binning)
|
||
# stock_distribution = {
|
||
# "0-100": 0,
|
||
# "101-500": 0,
|
||
# "501-1000": 0,
|
||
# "1001-5000": 0,
|
||
# "5000+": 0
|
||
# }
|
||
|
||
# for qty in stock_quantities:
|
||
# if qty <= 100:
|
||
# stock_distribution["0-100"] += 1
|
||
# elif qty <= 500:
|
||
# stock_distribution["101-500"] += 1
|
||
# elif qty <= 1000:
|
||
# stock_distribution["501-1000"] += 1
|
||
# elif qty <= 5000:
|
||
# stock_distribution["1001-5000"] += 1
|
||
# else:
|
||
# stock_distribution["5000+"] += 1
|
||
|
||
# Basitleştirilmiş ürün listesi (sadece fiyat analizi için)
|
||
# Full products data for Overview tab
|
||
full_products = []
|
||
|
||
for product in all_products:
|
||
price = product.get("price", {}).get("sellingPrice")
|
||
category = product.get("categoryName") or product.get("category")
|
||
brand = product.get("brand", {}).get("name") or product.get("brandName") or "Bilinmeyen"
|
||
|
||
# Extract category name if it's a dict
|
||
if isinstance(category, dict):
|
||
category_name = category.get("name", "")
|
||
else:
|
||
category_name = category if category else ""
|
||
|
||
# Social proof data (orders, views, baskets, favorites, etc.) - socialProofs is an array
|
||
social_proofs = product.get("socialProofs", [])
|
||
orders = 0
|
||
page_views = 0
|
||
baskets = 0
|
||
favorites = 0
|
||
|
||
if isinstance(social_proofs, list):
|
||
for proof in social_proofs:
|
||
proof_type = proof.get("type", "")
|
||
value_str = proof.get("value", "0")
|
||
|
||
# Parse value (can be string like "208" or "1k")
|
||
try:
|
||
if "k" in value_str.lower():
|
||
parsed_value = int(float(value_str.lower().replace("k", "")) * 1000)
|
||
else:
|
||
parsed_value = int(value_str)
|
||
except:
|
||
parsed_value = 0
|
||
|
||
# Assign to appropriate field
|
||
if proof_type == "orderCountL3D":
|
||
orders = parsed_value
|
||
elif proof_type == "pageViewCount":
|
||
page_views = parsed_value
|
||
elif proof_type == "basketCount":
|
||
baskets = parsed_value
|
||
elif proof_type == "favoriteCount":
|
||
favorites = parsed_value
|
||
|
||
# Product image and URL
|
||
images = product.get("images", [])
|
||
image_url = images[0] if isinstance(images, list) and len(images) > 0 else ""
|
||
|
||
# Trendyol URL
|
||
product_url = product.get("url", "")
|
||
if not product_url:
|
||
content_id = product.get("contentId") or product.get("id")
|
||
if content_id:
|
||
product_url = f"https://www.trendyol.com/p/{content_id}"
|
||
|
||
# Extract barcode from winnerVariant
|
||
barcode = ""
|
||
winner_variant = product.get("winnerVariant", {})
|
||
if isinstance(winner_variant, dict):
|
||
barcode = winner_variant.get("barcode", "")
|
||
|
||
# Extract country (origin) from merchantListings
|
||
country_code = ""
|
||
country_name = "Bilinmeyen" # Default value for products without origin data
|
||
merchant_listings_temp = product.get("merchantListings", [])
|
||
if merchant_listings_temp and len(merchant_listings_temp) > 0:
|
||
custom_values = merchant_listings_temp[0].get("customValues", [])
|
||
for cv in custom_values:
|
||
if cv.get("key") == "origin":
|
||
country_code = cv.get("value", "").upper()
|
||
country_name = COUNTRY_NAMES.get(country_code, f"Diğer ({country_code})" if country_code else "Bilinmeyen")
|
||
break
|
||
|
||
# Extract review count
|
||
review_count = 0
|
||
try:
|
||
review_count = int(product.get("rating_count", 0) or 0)
|
||
except:
|
||
try:
|
||
rating_obj = product.get("rating", {})
|
||
if isinstance(rating_obj, dict):
|
||
review_count = int(rating_obj.get("totalComments", 0) or rating_obj.get("totalCount", 0) or 0)
|
||
except:
|
||
review_count = 0
|
||
|
||
# Extract rating score
|
||
rating_score = 0.0
|
||
try:
|
||
rating_obj = product.get("rating", {})
|
||
if isinstance(rating_obj, dict):
|
||
rating_score = float(rating_obj.get("averageRating", 0) or rating_obj.get("score", 0) or 0)
|
||
except:
|
||
rating_score = 0.0
|
||
|
||
if price and category_name:
|
||
full_products.append({
|
||
"id": product.get("contentId") or product.get("id"),
|
||
"name": product.get("name", ""),
|
||
"brand": brand,
|
||
"price": price,
|
||
"category_name": category_name,
|
||
"orders": orders,
|
||
"page_views": page_views,
|
||
"baskets": baskets, # Basket/cart additions
|
||
"favorites": favorites, # Wishlist/favorites count
|
||
"review_count": review_count, # Review/comment count
|
||
"rating": rating_score, # Average rating score (0-5)
|
||
"image_url": image_url if image_url else "https://via.placeholder.com/150",
|
||
"url": product_url,
|
||
"barcode": barcode, # Barcode field added for barcode analysis
|
||
"country_code": country_code, # Country code (TR, CN, DE, etc.)
|
||
"country": country_name # Country name (Türkiye, Çin, Almanya, etc.)
|
||
})
|
||
|
||
result = {
|
||
"report_id": report_id,
|
||
"report_name": report.name,
|
||
"kpis": kpis,
|
||
"all_products": full_products, # Full product data with social proof, images, URLs
|
||
"charts": {
|
||
"price_distribution": price_ranges,
|
||
"top_categories": top_categories,
|
||
"top_brands": top_brands,
|
||
# DISABLED: "stock_status": stock_status, # Not needed per user request
|
||
"rating_distribution": rating_distribution,
|
||
"brand_price_boxplot": brand_price_stats,
|
||
"price_rating_scatter": scatter_data,
|
||
"brand_strength": brand_strength_scores,
|
||
"brand_category_heatmap": heatmap_data,
|
||
"category_price_premium": {
|
||
"all_categories": category_price_analysis_sorted,
|
||
"most_expensive": most_expensive_categories,
|
||
"most_affordable": most_affordable_categories
|
||
},
|
||
"origin_analysis": {
|
||
"countries": origin_country_data_sorted,
|
||
"top_countries": origin_country_data_sorted[:10],
|
||
"total_products_with_origin": products_with_origin,
|
||
"coverage_percentage": round((products_with_origin / total_products * 100), 2) if total_products > 0 else 0
|
||
},
|
||
"barcode_analysis": {
|
||
"prefixes": barcode_prefix_data_sorted,
|
||
"countries_from_barcode": barcode_country_data_sorted,
|
||
"top_countries_from_barcode": barcode_country_data_sorted[:10],
|
||
"total_products_with_barcode": products_with_barcode,
|
||
"coverage_percentage": round((products_with_barcode / total_products * 100), 2) if total_products > 0 else 0
|
||
},
|
||
"merchant_analysis": {
|
||
"merchants": merchant_list_sorted,
|
||
"top_merchants": top_merchants,
|
||
"total_merchants": total_merchants,
|
||
"total_products_with_merchant": products_with_merchant,
|
||
"total_winners": total_winners,
|
||
"winner_percentage": round(winner_percentage, 2),
|
||
"coverage_percentage": round((products_with_merchant / total_products * 100), 2) if total_products > 0 else 0
|
||
}
|
||
# DISABLED: Stock quantity analysis (not needed per user request)
|
||
# "stock_analysis": {
|
||
# "avg_stock": round(avg_stock, 2),
|
||
# "median_stock": round(median_stock, 2),
|
||
# "total_stock": total_stock,
|
||
# "min_stock": min_stock,
|
||
# "max_stock": max_stock,
|
||
# "products_with_stock_info": products_with_stock_info,
|
||
# "coverage_percentage": round((products_with_stock_info / total_products * 100), 2) if total_products > 0 else 0,
|
||
# "distribution": stock_distribution,
|
||
# "category_stocks": category_stock_sorted,
|
||
# "top_stocked_categories": category_stock_sorted[:10]
|
||
# }
|
||
},
|
||
"insights": {
|
||
"low_rating_products": low_rating_products,
|
||
"anomalies": anomalies
|
||
}
|
||
}
|
||
|
||
# Cache the result for 1 hour
|
||
dashboard_cache[cache_key] = (result, time.time())
|
||
print(f"📊 Cached dashboard data for report {report_id}")
|
||
|
||
return result
|
||
|
||
|
||
# ============================================================================
|
||
# NEW: External API aggregations (Reviews, Social Proof, Q&A, Similar, Followers)
|
||
# ============================================================================
|
||
|
||
class AggregationParams(BaseModel):
|
||
refresh: Optional[bool] = False
|
||
per_product_limit: Optional[int] = None # None = API default
|
||
batch_size: Optional[int] = 50
|
||
|
||
|
||
# DISABLED: Reviews feature removed per user request
|
||
# @app.get("/api/reports/{report_id}/reviews-summary")
|
||
def reviews_summary_disabled(report_id: int, refresh: bool = False, db: Session = Depends(get_db)):
|
||
return {"error": "Reviews feature is disabled"}
|
||
# Try persistent cache first
|
||
if not refresh:
|
||
persisted = _load_json(f"{REPORTS_DIR}/enrich_{report_id}/reviews.json")
|
||
if persisted:
|
||
return {"source": "file", **persisted}
|
||
cache_key = f"{report_id}"
|
||
if not refresh and cache_key in reviews_cache:
|
||
return {"source": "cache", **reviews_cache[cache_key]}
|
||
|
||
try:
|
||
all_products, _ = load_report_products(db, report_id)
|
||
product_ids = [p.get("id") for p in all_products if p.get("id")]
|
||
total = len(product_ids)
|
||
|
||
summary = {
|
||
"total_products": total,
|
||
"processed": 0,
|
||
"total_reviews": 0,
|
||
"rating_histogram": {"5": 0, "4": 0, "3": 0, "2": 0, "1": 0},
|
||
"tags": {},
|
||
"sample_reviews": []
|
||
}
|
||
|
||
for pid in product_ids:
|
||
data = fetch_product_reviews(pid, page=0, page_size=5)
|
||
if not data:
|
||
_log_enrichment_error(db, report_id=report_id, product_id=int(pid), merchant_id=None,
|
||
endpoint="reviews", error_type="request_failed",
|
||
message="reviews request failed or timed out")
|
||
continue
|
||
summary["processed"] += 1
|
||
result = data.get("result", {}) if isinstance(data, dict) else {}
|
||
summary_data = result.get("summary", {})
|
||
|
||
# rating histogram - API'de "ratingCounts" olarak geliyor
|
||
rating_counts = summary_data.get("ratingCounts", []) or summary_data.get("ratingHistogram", [])
|
||
for h in rating_counts:
|
||
k = str(h.get("rate", "") or h.get("rating", ""))
|
||
v = h.get("count", 0)
|
||
if k in summary["rating_histogram"]:
|
||
summary["rating_histogram"][k] += v
|
||
|
||
# total reviews
|
||
total_review_count = summary_data.get("totalCommentCount") or summary_data.get("totalCount") or 0
|
||
summary["total_reviews"] += total_review_count
|
||
|
||
# tags - API'de "tags" olarak geliyor (tagCloud değil)
|
||
tags = summary_data.get("tags", []) or summary_data.get("tagCloud", [])
|
||
for tag in tags:
|
||
t = tag.get("name") or tag.get("tag")
|
||
c = tag.get("count", 0)
|
||
if t:
|
||
summary["tags"][t] = summary["tags"].get(t, 0) + c
|
||
|
||
# samples - API'de "reviews" olarak geliyor (content değil)
|
||
reviews = result.get("reviews", []) or result.get("content", [])
|
||
for r in reviews[:2]:
|
||
summary["sample_reviews"].append({
|
||
"product_id": pid,
|
||
"rating": r.get("rate") or r.get("rating"),
|
||
"helpful": r.get("likeCount", 0),
|
||
"with_image": bool(r.get("imageUrls") or r.get("hasImage")),
|
||
"text": (r.get("comment") or "")[:200]
|
||
})
|
||
# small pacing, limiter already active
|
||
time.sleep(0.01)
|
||
|
||
# top tags
|
||
summary["top_tags"] = sorted(
|
||
[{"tag": t, "count": c} for t, c in summary["tags"].items()],
|
||
key=lambda x: x["count"], reverse=True
|
||
)[:30]
|
||
|
||
payload = {"source": "live", "summary": summary}
|
||
reviews_cache[cache_key] = payload
|
||
return payload
|
||
except Exception as e:
|
||
return {"error": str(e), "note": "Network or parsing issue", "summary": {}}
|
||
|
||
|
||
@app.get("/api/reports/{report_id}/social-proof/progress")
|
||
def social_proof_progress(report_id: int):
|
||
"""Check social proof enrichment progress"""
|
||
progress_key = f"social_{report_id}"
|
||
if progress_key in enrichment_progress:
|
||
return enrichment_progress.get(progress_key)
|
||
return {"status": "not_started", "progress": 0}
|
||
|
||
|
||
@app.get("/api/reports/{report_id}/social-proof")
|
||
def social_proof(report_id: int, refresh: bool = False, batch_size: int = 5, db: Session = Depends(get_db)):
|
||
# Try persistent cache first
|
||
if not refresh:
|
||
persisted = _load_json(f"{REPORTS_DIR}/enrich_{report_id}/social.json")
|
||
if persisted:
|
||
pass
|
||
# Transform cache data to frontend format
|
||
agg = {
|
||
"products": persisted.get("products", 0),
|
||
"total": persisted.get("total", {}),
|
||
"missing": persisted.get("missing", 0)
|
||
}
|
||
details = persisted.get("details", {})
|
||
|
||
# Build ALL products list (not just top 20)
|
||
details_list = [
|
||
{"product_id": int(pid), **vals}
|
||
for pid, vals in details.items()
|
||
]
|
||
# Sort by page_views (descending) - ALL products
|
||
all_by_views = sorted(details_list, key=lambda x: x.get("page_views", 0), reverse=True)
|
||
# Sort by orders (descending) - ALL products
|
||
all_by_orders = sorted(details_list, key=lambda x: x.get("orders", 0), reverse=True)
|
||
|
||
return {
|
||
"source": "file",
|
||
"aggregation": agg,
|
||
"details": details,
|
||
"top_by_views": all_by_views, # All products sorted by views
|
||
"top_by_orders": all_by_orders # All products sorted by orders
|
||
}
|
||
cache_key = f"{report_id}:b{batch_size}"
|
||
if not refresh and cache_key in social_proof_cache:
|
||
return {"source": "cache", **social_proof_cache[cache_key]}
|
||
|
||
# Initialize progress tracking
|
||
progress_key = f"social_{report_id}"
|
||
enrichment_progress.set(progress_key, {
|
||
"status": "in_progress",
|
||
"progress": 0,
|
||
"total": 0,
|
||
"processed": 0,
|
||
"message": "Sosyal kanıt verileri toplanıyor..."
|
||
})
|
||
|
||
try:
|
||
all_products, _ = load_report_products(db, report_id)
|
||
product_ids = [p.get("id") for p in all_products if p.get("id")]
|
||
total_products = len(product_ids)
|
||
|
||
# Update total count
|
||
enrichment_progress.set(progress_key, {
|
||
"status": "in_progress",
|
||
"progress": 0,
|
||
"total": total_products,
|
||
"processed": 0,
|
||
"message": f"0 / {total_products} ürün işlendi"
|
||
})
|
||
# Map product id to a short display name
|
||
product_info = {}
|
||
for p in all_products:
|
||
pid = p.get("id")
|
||
if not pid:
|
||
continue
|
||
name = (p.get("name") or "Unknown")
|
||
brand = p.get("brand", {}).get("name")
|
||
if brand:
|
||
display = f"{brand} - {name}"[:80]
|
||
else:
|
||
display = name[:80]
|
||
product_info[int(pid)] = {"name": display}
|
||
agg = {
|
||
"products": len(product_ids),
|
||
"total": {"page_views": 0, "orders": 0, "baskets": 0, "favorites": 0},
|
||
"missing": 0
|
||
}
|
||
details = {}
|
||
processed = 0
|
||
|
||
for chunk in _chunked(product_ids, max(1, batch_size)):
|
||
data = fetch_social_proof(chunk)
|
||
if not data:
|
||
agg["missing"] += len(chunk)
|
||
# log per chunk
|
||
_log_enrichment_error(db, report_id=report_id, product_id=None, merchant_id=None,
|
||
endpoint="social", error_type="request_failed",
|
||
message=f"social-proof batch failed for {len(chunk)} items")
|
||
time.sleep(0.02)
|
||
processed += len(chunk)
|
||
# Update progress
|
||
progress_pct = int((processed / total_products) * 100)
|
||
enrichment_progress.set(progress_key, {
|
||
"status": "in_progress",
|
||
"progress": progress_pct,
|
||
"total": total_products,
|
||
"processed": processed,
|
||
"message": f"{processed} / {total_products} ürün işlendi"
|
||
})
|
||
continue
|
||
|
||
items = data.get("result") or data.get("items") or []
|
||
for it in items:
|
||
pid = it.get("contentId") or it.get("id")
|
||
pv = _safe_int(it.get("pageViewCount") or it.get("page-view-count") or 0)
|
||
od = _safe_int(it.get("orderCount") or it.get("order-count") or 0)
|
||
bk = _safe_int(it.get("basketCount") or it.get("basket-count") or 0)
|
||
fv = _safe_int(it.get("favoriteCount") or it.get("favorite-count") or 0)
|
||
if pid is None:
|
||
continue
|
||
details[int(pid)] = {"page_views": pv, "orders": od, "baskets": bk, "favorites": fv}
|
||
agg["total"]["page_views"] += pv
|
||
agg["total"]["orders"] += od
|
||
agg["total"]["baskets"] += bk
|
||
agg["total"]["favorites"] += fv
|
||
|
||
processed += len(chunk)
|
||
# Update progress after each chunk
|
||
progress_pct = int((processed / total_products) * 100)
|
||
enrichment_progress.set(progress_key, {
|
||
"status": "in_progress",
|
||
"progress": progress_pct,
|
||
"total": total_products,
|
||
"processed": processed,
|
||
"message": f"{processed} / {total_products} ürün işlendi"
|
||
})
|
||
time.sleep(0.01)
|
||
|
||
# Build top lists
|
||
details_list = [
|
||
{"product_id": pid, **vals, **(product_info.get(pid, {}))}
|
||
for pid, vals in details.items()
|
||
]
|
||
top_by_views = sorted(details_list, key=lambda x: x.get("page_views", 0), reverse=True)[:20]
|
||
top_by_orders = sorted(details_list, key=lambda x: x.get("orders", 0), reverse=True)[:20]
|
||
|
||
result = {"aggregation": agg, "details": details, "top_by_views": top_by_views, "top_by_orders": top_by_orders}
|
||
social_proof_cache[cache_key] = result
|
||
|
||
# Mark as completed
|
||
enrichment_progress.set(progress_key, {
|
||
"status": "completed",
|
||
"progress": 100,
|
||
"total": total_products,
|
||
"processed": total_products,
|
||
"message": f"Tamamlandı! {total_products} ürün işlendi"
|
||
})
|
||
|
||
return result
|
||
except Exception as e:
|
||
pass
|
||
# Mark as failed
|
||
enrichment_progress.set(progress_key, {
|
||
"status": "failed",
|
||
"progress": 0,
|
||
"total": 0,
|
||
"processed": 0,
|
||
"message": f"Hata: {str(e)}"
|
||
})
|
||
return {"error": str(e), "note": "Network or parsing issue", "aggregation": {}, "details": {}}
|
||
|
||
|
||
@app.get("/api/reports/{report_id}/sales-analytics")
|
||
def sales_analytics(report_id: int):
|
||
"""
|
||
Fast sales analytics endpoint - returns top products by orders
|
||
"""
|
||
try:
|
||
# Load social proof data
|
||
social_data = _load_json(f"{REPORTS_DIR}/enrich_{report_id}/social.json")
|
||
if not social_data:
|
||
return {"error": "Social proof data not found"}
|
||
|
||
social_details = social_data.get("details", {})
|
||
|
||
# Build enriched product list from social proof data
|
||
enriched_products = []
|
||
for pid_str, social_vals in social_details.items():
|
||
# Social data already has name, brand, price
|
||
enriched = {
|
||
"id": int(pid_str),
|
||
"name": social_vals.get("name", ""),
|
||
"brand": social_vals.get("brand", "Unknown"),
|
||
"price": social_vals.get("price", 0),
|
||
"page_views": social_vals.get("page_views", 0),
|
||
"orders": social_vals.get("orders", 0),
|
||
"baskets": social_vals.get("baskets", 0),
|
||
"favorites": social_vals.get("favorites", 0)
|
||
}
|
||
|
||
# Calculate conversion rates
|
||
views = enriched["page_views"]
|
||
baskets = enriched["baskets"]
|
||
orders = enriched["orders"]
|
||
|
||
enriched["view_to_basket_rate"] = (baskets / views * 100) if views > 0 else 0
|
||
enriched["basket_to_order_rate"] = (orders / baskets * 100) if baskets > 0 else 0
|
||
enriched["view_to_order_rate"] = (orders / views * 100) if views > 0 else 0
|
||
|
||
# Calculate performance score (0-100)
|
||
# Weighted: orders (40%), conversion rate (30%), views (20%), favorites (10%)
|
||
max_orders = max([s.get("orders", 0) for s in social_details.values()]) or 1
|
||
max_views = max([s.get("page_views", 0) for s in social_details.values()]) or 1
|
||
max_favs = max([s.get("favorites", 0) for s in social_details.values()]) or 1
|
||
|
||
order_score = (orders / max_orders) * 40
|
||
conversion_score = enriched["view_to_order_rate"] * 3 # Scale 0-30
|
||
view_score = (views / max_views) * 20
|
||
fav_score = (enriched["favorites"] / max_favs) * 10
|
||
|
||
enriched["performance_score"] = min(100, order_score + conversion_score + view_score + fav_score)
|
||
|
||
enriched_products.append(enriched)
|
||
|
||
# Return top products by orders
|
||
top_by_orders = sorted(enriched_products, key=lambda x: x.get("orders", 0), reverse=True)[:20]
|
||
|
||
return {
|
||
"top_products_by_orders": top_by_orders
|
||
}
|
||
|
||
except Exception as e:
|
||
return {"error": str(e), "note": "Failed to generate sales analytics"}
|
||
|
||
|
||
# ============================================================================
|
||
# KEYWORD ANALYSIS
|
||
# ============================================================================
|
||
|
||
# Türkçe stop words listesi
|
||
TURKISH_STOP_WORDS = {
|
||
've', 'ile', 'için', 'bir', 'bu', 'şu', 'o', 'de', 'da', 'ki',
|
||
'mi', 'mı', 'mu', 'mü', 'ise', 'olan', 'olarak', 'gibi',
|
||
'kadar', 'daha', 'en', 'çok', 'az', 'var', 'yok', 'veya',
|
||
'ile', 'ancak', 'ama', 'fakat', 'lakin', 'çünkü', 'zira',
|
||
'şöyle', 'böyle', 'nasıl', 'ne', 'neden', 'niçin', 'niye',
|
||
'a', 'e', 'i', 'ı', 'o', 'ö', 'u', 'ü' # Tek harfler
|
||
}
|
||
|
||
def normalize_turkish(text: str) -> str:
|
||
"""Türkçe karakterleri normalize eder"""
|
||
replacements = {
|
||
'İ': 'i', 'I': 'ı',
|
||
'Ş': 'ş', 'ş': 'ş',
|
||
'Ğ': 'ğ', 'ğ': 'ğ',
|
||
'Ü': 'ü', 'ü': 'ü',
|
||
'Ö': 'ö', 'ö': 'ö',
|
||
'Ç': 'ç', 'ç': 'ç'
|
||
}
|
||
for old, new in replacements.items():
|
||
text = text.replace(old, new)
|
||
return text
|
||
|
||
def extract_keywords_from_text(text: str, min_length: int = 3, stop_words: set = None) -> list:
|
||
"""
|
||
Metinden keyword'leri çıkarır.
|
||
|
||
Args:
|
||
text: Analiz edilecek metin
|
||
min_length: Minimum keyword uzunluğu
|
||
stop_words: Stop words seti (None ise TURKISH_STOP_WORDS kullanılır)
|
||
|
||
Returns:
|
||
Keyword listesi
|
||
"""
|
||
if not text or not isinstance(text, str):
|
||
return []
|
||
|
||
if stop_words is None:
|
||
stop_words = TURKISH_STOP_WORDS
|
||
|
||
# 1. Küçük harfe çevir
|
||
text = text.lower()
|
||
|
||
# 2. Türkçe karakterleri normalize et
|
||
text = normalize_turkish(text)
|
||
|
||
# 3. Noktalama işaretlerini temizle
|
||
text = re.sub(r'[^\w\s]', ' ', text)
|
||
|
||
# 4. Sayıları temizle
|
||
text = re.sub(r'\d+', '', text)
|
||
|
||
# 5. Fazla boşlukları temizle
|
||
text = re.sub(r'\s+', ' ', text).strip()
|
||
|
||
# 6. Tokenize
|
||
tokens = text.split()
|
||
|
||
# 7. Filtrele
|
||
keywords = [
|
||
token for token in tokens
|
||
if len(token) >= min_length
|
||
and token not in stop_words
|
||
and not token.isdigit()
|
||
and token.isalpha() # Sadece harf içeren
|
||
]
|
||
|
||
return keywords
|
||
|
||
def generate_ngrams(tokens: list, min_n: int = 1, max_n: int = 3) -> list:
|
||
"""
|
||
Generate N-grams from a list of tokens.
|
||
|
||
Args:
|
||
tokens: List of words (tokens)
|
||
min_n: Minimum N-gram size (default: 1 = single words)
|
||
max_n: Maximum N-gram size (default: 3 = trigrams)
|
||
|
||
Returns:
|
||
List of N-grams as strings
|
||
|
||
Example:
|
||
tokens = ["mini", "buzdolabı", "beyaz"]
|
||
min_n=1, max_n=3 →
|
||
["mini", "buzdolabı", "beyaz", # 1-grams
|
||
"mini buzdolabı", "buzdolabı beyaz", # 2-grams
|
||
"mini buzdolabı beyaz"] # 3-grams
|
||
"""
|
||
ngrams = []
|
||
|
||
for n in range(min_n, max_n + 1):
|
||
# Generate all n-grams of size n
|
||
for i in range(len(tokens) - n + 1):
|
||
ngram = ' '.join(tokens[i:i+n])
|
||
ngrams.append(ngram)
|
||
|
||
return ngrams
|
||
|
||
@app.get("/api/reports/{report_id}/keyword-analysis")
|
||
def keyword_analysis(
|
||
report_id: int,
|
||
min_frequency: int = 3,
|
||
min_length: int = 3,
|
||
category_filter: Optional[str] = None,
|
||
top_n: int = 50, # DEPRECATED: Use page/per_page instead
|
||
# Pagination parameters
|
||
page: int = 1, # Page number (1-indexed)
|
||
per_page: int = 100, # Keywords per page
|
||
# N-gram (word count) filters
|
||
min_word_count: Optional[int] = 1, # Minimum number of words in keyword (1-5)
|
||
max_word_count: Optional[int] = 3, # Maximum number of words in keyword (1-5)
|
||
# Advanced filters
|
||
min_views: Optional[int] = None,
|
||
max_views: Optional[int] = None,
|
||
min_orders: Optional[int] = None,
|
||
max_orders: Optional[int] = None,
|
||
min_reviews: Optional[int] = None,
|
||
max_reviews: Optional[int] = None,
|
||
min_conversion_rate: Optional[float] = None,
|
||
max_conversion_rate: Optional[float] = None,
|
||
min_avg_price: Optional[float] = None,
|
||
max_avg_price: Optional[float] = None,
|
||
sort_by: str = "frequency", # frequency, views, orders, reviews, conversion_rate
|
||
sort_order: str = "desc", # asc, desc
|
||
competition_level: Optional[str] = None, # low, medium, high
|
||
min_potential_score: Optional[float] = None, # 0-100
|
||
db: Session = Depends(get_db)
|
||
):
|
||
"""
|
||
Ürün adlarından keyword çıkarır ve N-gram analizi yapar.
|
||
|
||
Args:
|
||
report_id: Rapor ID
|
||
min_frequency: Minimum keyword geçme sayısı (default: 3)
|
||
min_length: Minimum karakter uzunluğu (default: 3)
|
||
min_word_count: Minimum kelime sayısı (1-5, default: 1)
|
||
max_word_count: Maximum kelime sayısı (1-5, default: 3)
|
||
category_filter: Belirli kategoriye göre filtreleme (opsiyonel)
|
||
top_n: Top N keyword göster (default: 50)
|
||
|
||
N-gram Examples:
|
||
min_word_count=1, max_word_count=1 → "mini", "buzdolabı", "beyaz"
|
||
min_word_count=2, max_word_count=2 → "mini buzdolabı", "buzdolabı beyaz"
|
||
min_word_count=2, max_word_count=3 → "mini buzdolabı", "beyaz buzdolabı", "mini buzdolabı modelleri"
|
||
|
||
Returns:
|
||
Keyword analiz sonuçları
|
||
"""
|
||
# print(f"🔍 ========== KEYWORD ANALYSIS REQUEST ==========")
|
||
print(f"📋 Report ID: {report_id}")
|
||
print(f"⚙️ Parameters: min_frequency={min_frequency}, min_length={min_length}, word_count={min_word_count}-{max_word_count}, top_n={top_n}, category_filter={category_filter}")
|
||
|
||
try:
|
||
print(f"📦 Ürünler yükleniyor...")
|
||
# Load products
|
||
all_products, categories_data = load_report_products(db, report_id)
|
||
print(f"✅ {len(all_products) if all_products else 0} ürün yüklendi")
|
||
|
||
if not all_products:
|
||
print(f"⚠️ Rapor için ürün bulunamadı!")
|
||
return {"error": "No products found for this report"}
|
||
|
||
# Load social proof data
|
||
print(f"📊 Social proof data yükleniyor...")
|
||
social_json_path = os.path.join(REPORTS_DIR, f"enrich_{report_id}", "social.json")
|
||
social_data = _load_json(social_json_path)
|
||
social_details = social_data.get("details", {}) if social_data else {}
|
||
print(f"✅ Social proof data yüklendi: {len(social_details)} ürün (path: {social_json_path})")
|
||
|
||
# Filter by category if specified
|
||
if category_filter:
|
||
all_products = [
|
||
p for p in all_products
|
||
if (p.get("category", {}).get("name") if isinstance(p.get("category"), dict) else p.get("category")) == category_filter
|
||
]
|
||
|
||
# Step 1: Extract keywords from all product names (OPTIMIZED)
|
||
print(f"🔤 Keyword extraction başlatılıyor... ({len(all_products)} ürün)")
|
||
keyword_to_products = {} # {keyword: [product_ids]}
|
||
product_keywords_map = {} # {product_id: [keywords]}
|
||
|
||
# Pre-compile regex patterns for better performance
|
||
import re
|
||
stop_words = TURKISH_STOP_WORDS
|
||
|
||
processed_count = 0
|
||
start_time = time.time()
|
||
|
||
for product in all_products:
|
||
product_id = product.get("id")
|
||
if not product_id:
|
||
continue
|
||
|
||
product_name = product.get("name", "")
|
||
if not product_name:
|
||
continue
|
||
|
||
# Extract keywords (optimized inline version)
|
||
text = product_name.lower()
|
||
text = normalize_turkish(text)
|
||
text = re.sub(r'[^\w\s]', ' ', text)
|
||
text = re.sub(r'\d+', '', text)
|
||
text = re.sub(r'\s+', ' ', text).strip()
|
||
|
||
tokens = text.split()
|
||
# Filter tokens by character length and stop words
|
||
filtered_tokens = [
|
||
token for token in tokens
|
||
if len(token) >= min_length
|
||
and token not in stop_words
|
||
and not token.isdigit()
|
||
and token.isalpha()
|
||
]
|
||
|
||
# Generate N-grams (1-word, 2-word, 3-word, etc.)
|
||
keywords = generate_ngrams(filtered_tokens, min_n=min_word_count, max_n=max_word_count)
|
||
|
||
product_keywords_map[product_id] = keywords
|
||
|
||
# Group by keyword
|
||
for keyword in keywords:
|
||
if keyword not in keyword_to_products:
|
||
keyword_to_products[keyword] = []
|
||
keyword_to_products[keyword].append(product_id)
|
||
|
||
processed_count += 1
|
||
if processed_count % 500 == 0:
|
||
elapsed = time.time() - start_time
|
||
rate = processed_count / elapsed if elapsed > 0 else 0
|
||
remaining = (len(all_products) - processed_count) / rate if rate > 0 else 0
|
||
print(f"⏳ İşlenen ürün: {processed_count}/{len(all_products)} ({rate:.0f} ürün/sn, ~{remaining:.0f}s kaldı)")
|
||
|
||
elapsed_total = time.time() - start_time
|
||
print(f"✅ Keyword extraction tamamlandı: {len(keyword_to_products)} unique keyword bulundu ({elapsed_total:.2f}s)")
|
||
|
||
# Step 2: Separate rare keywords (frequency 1-2) and common keywords (>= min_frequency)
|
||
print(f"🔍 Keyword ayrıştırma: rare (1-2) vs common (>={min_frequency})")
|
||
rare_keywords = {
|
||
kw: product_ids
|
||
for kw, product_ids in keyword_to_products.items()
|
||
if 1 <= len(product_ids) <= 2
|
||
}
|
||
filtered_keywords = {
|
||
kw: product_ids
|
||
for kw, product_ids in keyword_to_products.items()
|
||
if len(product_ids) >= min_frequency
|
||
}
|
||
print(f"✅ Rare keywords: {len(rare_keywords)} | Common keywords: {len(filtered_keywords)}")
|
||
|
||
# Step 3: Calculate metrics for each keyword (OPTIMIZED)
|
||
print(f"📊 Metrikler hesaplanıyor... ({len(filtered_keywords)} keyword)")
|
||
keyword_metrics = []
|
||
|
||
# Create product lookup dict for faster access
|
||
product_dict = {p.get("id"): p for p in all_products if p.get("id")}
|
||
|
||
metric_count = 0
|
||
metric_start_time = time.time()
|
||
for keyword, product_ids in filtered_keywords.items():
|
||
# Get category distribution and metrics
|
||
category_counts = {}
|
||
category_metrics = {} # {category: {views, orders, reviews, baskets, favorites, product_count}}
|
||
total_views = 0
|
||
total_orders = 0
|
||
total_baskets = 0
|
||
total_favorites = 0
|
||
total_reviews = 0
|
||
keyword_products = []
|
||
|
||
for pid in product_ids:
|
||
# Find product (using dict lookup for O(1) instead of O(n))
|
||
product = product_dict.get(pid)
|
||
if not product:
|
||
continue
|
||
|
||
# Get category
|
||
category = product.get("category", {})
|
||
if isinstance(category, dict):
|
||
category_name = category.get("name", "Uncategorized")
|
||
else:
|
||
category_name = category if category else "Uncategorized"
|
||
|
||
category_counts[category_name] = category_counts.get(category_name, 0) + 1
|
||
|
||
# Initialize category metrics if not exists
|
||
if category_name not in category_metrics:
|
||
category_metrics[category_name] = {
|
||
"views": 0,
|
||
"orders": 0,
|
||
"reviews": 0,
|
||
"baskets": 0,
|
||
"favorites": 0,
|
||
"product_count": 0
|
||
}
|
||
|
||
# Get social proof
|
||
social = social_details.get(str(pid), {})
|
||
views = social.get("page_views", 0) or 0
|
||
orders = social.get("orders", 0) or 0
|
||
baskets = social.get("baskets", 0) or 0
|
||
favorites = social.get("favorites", 0) or 0
|
||
|
||
# Get review count from multiple possible locations
|
||
review_count = 0
|
||
# Try rating_count field first
|
||
if product.get("rating_count"):
|
||
review_count = int(product.get("rating_count", 0) or 0)
|
||
# Try rating object with totalCount or totalComments
|
||
elif product.get("rating"):
|
||
rating_obj = product.get("rating")
|
||
if isinstance(rating_obj, dict):
|
||
review_count = int(rating_obj.get("totalComments", 0) or rating_obj.get("totalCount", 0) or 0)
|
||
elif isinstance(rating_obj, (int, float)):
|
||
pass
|
||
# If rating is just a number, no review count available
|
||
review_count = 0
|
||
|
||
# Add to category metrics
|
||
category_metrics[category_name]["views"] += views
|
||
category_metrics[category_name]["orders"] += orders
|
||
category_metrics[category_name]["reviews"] += review_count
|
||
category_metrics[category_name]["baskets"] += baskets
|
||
category_metrics[category_name]["favorites"] += favorites
|
||
category_metrics[category_name]["product_count"] += 1
|
||
|
||
# Add to totals
|
||
total_views += views
|
||
total_orders += orders
|
||
total_baskets += baskets
|
||
total_favorites += favorites
|
||
total_reviews += review_count
|
||
|
||
# Store product info
|
||
keyword_products.append({
|
||
"id": pid,
|
||
"name": product.get("name", "")[:50],
|
||
"category": category_name,
|
||
"brand": product.get("brand", {}).get("name", "Unknown") if isinstance(product.get("brand"), dict) else "Unknown",
|
||
"views": views,
|
||
"orders": orders,
|
||
"reviews": review_count,
|
||
"price": product.get("price", {}).get("sellingPrice", 0) if isinstance(product.get("price"), dict) else 0,
|
||
"image_url": product.get("images", [])[0] if product.get("images") else "https://via.placeholder.com/150",
|
||
"url": product.get("url", "") or f"https://www.trendyol.com/p/{pid}"
|
||
})
|
||
|
||
product_count = len(product_ids)
|
||
|
||
# Calculate averages
|
||
avg_views = total_views / product_count if product_count > 0 else 0
|
||
avg_orders = total_orders / product_count if product_count > 0 else 0
|
||
avg_reviews = total_reviews / product_count if product_count > 0 else 0
|
||
conversion_rate = (total_orders / total_views * 100) if total_views > 0 else 0
|
||
|
||
# Top categories with metrics
|
||
top_categories = []
|
||
for cat, count in category_counts.items():
|
||
cat_metrics = category_metrics.get(cat, {})
|
||
cat_views = cat_metrics.get("views", 0)
|
||
cat_orders = cat_metrics.get("orders", 0)
|
||
cat_reviews = cat_metrics.get("reviews", 0)
|
||
cat_conversion = (cat_orders / cat_views * 100) if cat_views > 0 else 0
|
||
|
||
top_categories.append({
|
||
"category": cat,
|
||
"count": count,
|
||
"percentage": (count / product_count * 100),
|
||
"total_views": cat_views,
|
||
"total_orders": cat_orders,
|
||
"total_reviews": cat_reviews,
|
||
"conversion_rate": round(cat_conversion, 2)
|
||
})
|
||
|
||
top_categories = sorted(
|
||
top_categories,
|
||
key=lambda x: x["count"],
|
||
reverse=True
|
||
)[:10] # Show top 10 categories
|
||
|
||
# Filter products with sales > 0, then sort by orders and take top 10
|
||
products_with_sales = [p for p in keyword_products if p["orders"] > 0]
|
||
top_products = sorted(
|
||
products_with_sales,
|
||
key=lambda x: x["orders"],
|
||
reverse=True
|
||
)[:10]
|
||
|
||
keyword_metrics.append({
|
||
"keyword": keyword,
|
||
"frequency": product_count,
|
||
"categories": category_counts,
|
||
"top_categories": top_categories,
|
||
"performance": {
|
||
"total_views": total_views,
|
||
"total_orders": total_orders,
|
||
"total_baskets": total_baskets,
|
||
"total_favorites": total_favorites,
|
||
"total_reviews": total_reviews,
|
||
"avg_views_per_product": round(avg_views, 2),
|
||
"avg_orders_per_product": round(avg_orders, 2),
|
||
"avg_reviews_per_product": round(avg_reviews, 2),
|
||
"conversion_rate": round(conversion_rate, 2)
|
||
},
|
||
"products": top_products # Top 10 products by sales
|
||
})
|
||
|
||
metric_count += 1
|
||
if metric_count % 50 == 0:
|
||
elapsed_metric = time.time() - metric_start_time
|
||
rate = metric_count / elapsed_metric if elapsed_metric > 0 else 0
|
||
remaining = (len(filtered_keywords) - metric_count) / rate if rate > 0 else 0
|
||
print(f"⏳ İşlenen keyword: {metric_count}/{len(filtered_keywords)} ({rate:.1f} keyword/sn, ~{remaining:.0f}s kaldı)")
|
||
|
||
metric_elapsed = time.time() - metric_start_time
|
||
print(f"✅ Metrikler hesaplandı: {len(keyword_metrics)} keyword ({metric_elapsed:.2f}s)")
|
||
|
||
# Step 4: Apply advanced filters
|
||
# print(f"🔍 Gelişmiş filtreler uygulanıyor...")
|
||
filtered_metrics = keyword_metrics.copy()
|
||
|
||
# Views filter
|
||
if min_views is not None:
|
||
filtered_metrics = [kw for kw in filtered_metrics if kw["performance"]["total_views"] >= min_views]
|
||
if max_views is not None:
|
||
filtered_metrics = [kw for kw in filtered_metrics if kw["performance"]["total_views"] <= max_views]
|
||
|
||
# Orders filter
|
||
if min_orders is not None:
|
||
filtered_metrics = [kw for kw in filtered_metrics if kw["performance"]["total_orders"] >= min_orders]
|
||
if max_orders is not None:
|
||
filtered_metrics = [kw for kw in filtered_metrics if kw["performance"]["total_orders"] <= max_orders]
|
||
|
||
# Reviews filter
|
||
if min_reviews is not None:
|
||
filtered_metrics = [kw for kw in filtered_metrics if kw["performance"]["total_reviews"] >= min_reviews]
|
||
if max_reviews is not None:
|
||
filtered_metrics = [kw for kw in filtered_metrics if kw["performance"]["total_reviews"] <= max_reviews]
|
||
|
||
# Conversion rate filter
|
||
if min_conversion_rate is not None:
|
||
filtered_metrics = [kw for kw in filtered_metrics if kw["performance"]["conversion_rate"] >= min_conversion_rate]
|
||
if max_conversion_rate is not None:
|
||
filtered_metrics = [kw for kw in filtered_metrics if kw["performance"]["conversion_rate"] <= max_conversion_rate]
|
||
|
||
# Average price filter (calculate from products)
|
||
if min_avg_price is not None or max_avg_price is not None:
|
||
filtered_by_price = []
|
||
for kw in filtered_metrics:
|
||
if kw["products"]:
|
||
avg_price = sum(p.get("price", 0) for p in kw["products"]) / len(kw["products"])
|
||
if min_avg_price is not None and avg_price < min_avg_price:
|
||
continue
|
||
if max_avg_price is not None and avg_price > max_avg_price:
|
||
continue
|
||
filtered_by_price.append(kw)
|
||
filtered_metrics = filtered_by_price
|
||
|
||
# Competition level filter (based on frequency)
|
||
if competition_level:
|
||
total_keywords_count = len(keyword_metrics)
|
||
if total_keywords_count > 0:
|
||
if competition_level == "low":
|
||
pass
|
||
# Bottom 33% - low competition
|
||
sorted_freqs = sorted([kw["frequency"] for kw in keyword_metrics])
|
||
threshold = sorted_freqs[int(total_keywords_count * 0.33)] if sorted_freqs else 0
|
||
filtered_metrics = [kw for kw in filtered_metrics if kw["frequency"] <= threshold]
|
||
elif competition_level == "high":
|
||
pass
|
||
# Top 33% - high competition
|
||
sorted_freqs = sorted([kw["frequency"] for kw in keyword_metrics], reverse=True)
|
||
threshold = sorted_freqs[int(total_keywords_count * 0.33)] if sorted_freqs else float('inf')
|
||
filtered_metrics = [kw for kw in filtered_metrics if kw["frequency"] >= threshold]
|
||
# medium is between low and high, no filter needed
|
||
|
||
# Potential score filter (high conversion + low competition)
|
||
if min_potential_score is not None:
|
||
pass
|
||
# Calculate potential score: (conversion_rate * 0.6) + ((100 - normalized_frequency) * 0.4)
|
||
max_freq = max([kw["frequency"] for kw in filtered_metrics]) if filtered_metrics else 1
|
||
for kw in filtered_metrics:
|
||
normalized_freq = (kw["frequency"] / max_freq) * 100 if max_freq > 0 else 0
|
||
potential = (kw["performance"]["conversion_rate"] * 0.6) + ((100 - normalized_freq) * 0.4)
|
||
kw["potential_score"] = round(potential, 2)
|
||
filtered_metrics = [kw for kw in filtered_metrics if kw.get("potential_score", 0) >= min_potential_score]
|
||
|
||
print(f"✅ Filtreleme sonrası: {len(filtered_metrics)} keyword kaldı")
|
||
|
||
# Step 5: Sort by selected criteria
|
||
print(f"📈 Sıralama yapılıyor: {sort_by} ({sort_order})...")
|
||
reverse_order = sort_order == "desc"
|
||
|
||
if sort_by == "frequency":
|
||
filtered_metrics.sort(key=lambda x: x["frequency"], reverse=reverse_order)
|
||
elif sort_by == "views":
|
||
filtered_metrics.sort(key=lambda x: x["performance"]["total_views"], reverse=reverse_order)
|
||
elif sort_by == "orders":
|
||
filtered_metrics.sort(key=lambda x: x["performance"]["total_orders"], reverse=reverse_order)
|
||
elif sort_by == "reviews":
|
||
filtered_metrics.sort(key=lambda x: x["performance"]["total_reviews"], reverse=reverse_order)
|
||
elif sort_by == "conversion_rate":
|
||
filtered_metrics.sort(key=lambda x: x["performance"]["conversion_rate"], reverse=reverse_order)
|
||
elif sort_by == "potential_score":
|
||
pass
|
||
# Calculate if not already calculated
|
||
max_freq = max([kw["frequency"] for kw in filtered_metrics]) if filtered_metrics else 1
|
||
for kw in filtered_metrics:
|
||
if "potential_score" not in kw:
|
||
normalized_freq = (kw["frequency"] / max_freq) * 100 if max_freq > 0 else 0
|
||
kw["potential_score"] = round((kw["performance"]["conversion_rate"] * 0.6) + ((100 - normalized_freq) * 0.4), 2)
|
||
filtered_metrics.sort(key=lambda x: x.get("potential_score", 0), reverse=reverse_order)
|
||
|
||
# Pagination logic
|
||
import math
|
||
total_keywords = len(filtered_metrics)
|
||
total_pages = math.ceil(total_keywords / per_page) if per_page > 0 else 1
|
||
|
||
# Calculate offset and limit
|
||
start_index = (page - 1) * per_page
|
||
end_index = start_index + per_page
|
||
|
||
# Get paginated keywords
|
||
paginated_keywords = filtered_metrics[start_index:end_index]
|
||
print(f"✅ Sayfa {page}/{total_pages} - {len(paginated_keywords)} keyword seçildi (toplam: {total_keywords})")
|
||
|
||
# Step 6: Process rare keywords (frequency 1-2) - Limited to top 100 for performance
|
||
print(f"📊 Rare keywords işleniyor... ({len(rare_keywords)} keyword)")
|
||
rare_metrics = []
|
||
rare_count = 0
|
||
for keyword, product_ids in rare_keywords.items():
|
||
# Get category distribution and metrics (simplified version)
|
||
total_views = 0
|
||
total_orders = 0
|
||
total_baskets = 0
|
||
|
||
for pid in product_ids:
|
||
product = product_dict.get(pid)
|
||
if not product:
|
||
continue
|
||
|
||
# Get social proof
|
||
social = social_details.get(str(pid), {})
|
||
total_views += social.get("contentViewCount", 0)
|
||
total_orders += social.get("totalOrder", 0)
|
||
total_baskets += social.get("totalBasket", 0)
|
||
|
||
# Only include rare keywords that have actual sales
|
||
if total_orders > 0:
|
||
rare_metrics.append({
|
||
"keyword": keyword,
|
||
"frequency": len(product_ids),
|
||
"performance": {
|
||
"total_views": total_views,
|
||
"total_orders": total_orders,
|
||
"total_baskets": total_baskets,
|
||
"conversion_rate": round((total_orders / total_views * 100) if total_views > 0 else 0, 2)
|
||
}
|
||
})
|
||
|
||
rare_count += 1
|
||
if rare_count >= 100: # Limit to 100 rare keywords for performance
|
||
break
|
||
|
||
# Sort rare keywords by orders (most promising first)
|
||
rare_metrics.sort(key=lambda x: x["performance"]["total_orders"], reverse=True)
|
||
print(f"✅ Rare keywords işlendi: {len(rare_metrics)} keyword (top 100)")
|
||
|
||
# Step 7: Build category × keyword matrix
|
||
category_keyword_matrix = {}
|
||
for kw_data in paginated_keywords:
|
||
keyword = kw_data["keyword"]
|
||
for cat, count in kw_data["categories"].items():
|
||
if cat not in category_keyword_matrix:
|
||
category_keyword_matrix[cat] = {}
|
||
category_keyword_matrix[cat][keyword] = count
|
||
|
||
# Step 7: Calculate insights
|
||
most_frequent = paginated_keywords[:10] if paginated_keywords else []
|
||
best_performing = sorted(
|
||
keyword_metrics,
|
||
key=lambda x: x["performance"]["total_orders"],
|
||
reverse=True
|
||
)[:10]
|
||
|
||
result = {
|
||
"total_keywords": total_keywords,
|
||
"total_products_analyzed": len(all_products),
|
||
"keywords": paginated_keywords,
|
||
"rare_keywords": rare_metrics, # NEW: Rare keywords (frequency 1-2)
|
||
"total_rare_keywords": len(rare_keywords), # NEW: Total count of rare keywords
|
||
"category_keyword_matrix": category_keyword_matrix,
|
||
"pagination": {
|
||
"page": page,
|
||
"per_page": per_page,
|
||
"total_pages": total_pages,
|
||
"total_items": total_keywords,
|
||
"has_next": page < total_pages,
|
||
"has_previous": page > 1
|
||
},
|
||
"insights": {
|
||
"most_frequent_keywords": [{"keyword": kw["keyword"], "frequency": kw["frequency"]} for kw in most_frequent],
|
||
"best_performing_keywords": [{"keyword": kw["keyword"], "total_orders": kw["performance"]["total_orders"]} for kw in best_performing]
|
||
}
|
||
}
|
||
|
||
print(f"✅ ========== KEYWORD ANALYSIS COMPLETED ==========")
|
||
print(f"📊 Sonuç: {result['total_keywords']} common keywords, {result['total_rare_keywords']} rare keywords, {result['total_products_analyzed']} ürün")
|
||
print(f"📄 Sayfa {page}/{total_pages} - {len(result['keywords'])} keyword gösteriliyor, {len(result['rare_keywords'])} rare keyword")
|
||
|
||
return result
|
||
|
||
except Exception as e:
|
||
import traceback
|
||
error_trace = traceback.format_exc()
|
||
print(f"❌ Keyword analysis error: {str(e)}")
|
||
print(f"Traceback: {error_trace}")
|
||
return {"error": str(e), "traceback": error_trace, "note": "Failed to generate keyword analysis"}
|
||
|
||
|
||
# ============================================================================
|
||
# PRODUCT FINDER
|
||
# ============================================================================
|
||
|
||
@app.get("/api/reports/{report_id}/product-finder")
|
||
def product_finder(
|
||
report_id: int,
|
||
keywords: Optional[str] = None,
|
||
keyword_logic: str = "and", # and, or, not
|
||
min_views: Optional[int] = None,
|
||
max_views: Optional[int] = None,
|
||
min_orders: Optional[int] = None,
|
||
max_orders: Optional[int] = None,
|
||
min_reviews: Optional[int] = None,
|
||
max_reviews: Optional[int] = None,
|
||
min_conversion_rate: Optional[float] = None,
|
||
max_conversion_rate: Optional[float] = None,
|
||
min_price: Optional[float] = None,
|
||
max_price: Optional[float] = None,
|
||
min_rating: Optional[float] = None,
|
||
max_rating: Optional[float] = None,
|
||
min_rating_count: Optional[int] = None,
|
||
max_rating_count: Optional[int] = None,
|
||
categories: Optional[str] = None, # comma-separated
|
||
brands: Optional[str] = None, # comma-separated
|
||
origin_countries: Optional[str] = None, # comma-separated
|
||
competition_level: Optional[str] = None,
|
||
min_potential_score: Optional[float] = None,
|
||
sort_by: str = "orders",
|
||
sort_order: str = "desc",
|
||
page: int = 1,
|
||
per_page: int = 50,
|
||
db: Session = Depends(get_db)
|
||
):
|
||
"""
|
||
Gelişmiş filtrelerle ürün arama ve bulma.
|
||
|
||
Args:
|
||
report_id: Rapor ID
|
||
keywords: Virgülle ayrılmış keyword listesi
|
||
keyword_logic: "and", "or", "not"
|
||
... (diğer filtreler)
|
||
page: Sayfa numarası
|
||
per_page: Sayfa başına ürün sayısı
|
||
|
||
Returns:
|
||
Filtrelenmiş ürün listesi
|
||
"""
|
||
# print(f"🔍 ========== PRODUCT FINDER REQUEST ==========")
|
||
print(f"📋 Report ID: {report_id}, Page: {page}, Per Page: {per_page}")
|
||
|
||
try:
|
||
pass
|
||
# Load products
|
||
all_products, categories_data = load_report_products(db, report_id)
|
||
print(f"✅ {len(all_products)} ürün yüklendi")
|
||
|
||
if not all_products:
|
||
return {
|
||
"total_products": 0,
|
||
"page": page,
|
||
"per_page": per_page,
|
||
"total_pages": 0,
|
||
"products": []
|
||
}
|
||
|
||
# Load social proof data
|
||
social_data = _load_json(f"{REPORTS_DIR}/enrich_{report_id}/social.json")
|
||
social_details = social_data.get("details", {}) if social_data else {}
|
||
print(f"✅ Social proof data yüklendi: {len(social_details)} ürün")
|
||
|
||
# Create product lookup dict
|
||
product_dict = {p.get("id"): p for p in all_products if p.get("id")}
|
||
|
||
# Filter products
|
||
filtered_products = []
|
||
|
||
# Parse filter lists
|
||
category_list = [c.strip() for c in categories.split(",")] if categories else []
|
||
brand_list = [b.strip() for b in brands.split(",")] if brands else []
|
||
origin_list = [o.strip() for o in origin_countries.split(",")] if origin_countries else []
|
||
keyword_list = [k.strip().lower() for k in keywords.split(",")] if keywords else []
|
||
|
||
# Calculate competition levels (category-based)
|
||
category_product_counts = {}
|
||
for product in all_products:
|
||
category = product.get("category", {})
|
||
if isinstance(category, dict):
|
||
cat_name = category.get("name", "Uncategorized")
|
||
else:
|
||
cat_name = category if category else "Uncategorized"
|
||
category_product_counts[cat_name] = category_product_counts.get(cat_name, 0) + 1
|
||
|
||
for product in all_products:
|
||
product_id = product.get("id")
|
||
if not product_id:
|
||
continue
|
||
|
||
# Get social proof
|
||
social = social_details.get(str(product_id), {})
|
||
views = social.get("page_views", 0) or 0
|
||
orders = social.get("orders", 0) or 0
|
||
baskets = social.get("baskets", 0) or 0
|
||
favorites = social.get("favorites", 0) or 0
|
||
|
||
# Get review count
|
||
review_count = 0
|
||
if product.get("rating_count"):
|
||
review_count = int(product.get("rating_count", 0) or 0)
|
||
elif product.get("rating"):
|
||
rating_obj = product.get("rating")
|
||
if isinstance(rating_obj, dict):
|
||
review_count = int(rating_obj.get("totalComments", 0) or rating_obj.get("totalCount", 0) or 0)
|
||
|
||
# Get rating
|
||
rating = 0
|
||
if product.get("rating"):
|
||
rating_obj = product.get("rating")
|
||
if isinstance(rating_obj, dict):
|
||
rating = float(rating_obj.get("averageRating", 0) or 0)
|
||
elif isinstance(rating_obj, (int, float)):
|
||
rating = float(rating_obj)
|
||
|
||
# Get price
|
||
price = 0
|
||
if product.get("price"):
|
||
price_obj = product.get("price")
|
||
if isinstance(price_obj, dict):
|
||
price = float(price_obj.get("sellingPrice", 0) or 0)
|
||
elif isinstance(price_obj, (int, float)):
|
||
price = float(price_obj)
|
||
|
||
# Get category
|
||
category = product.get("category", {})
|
||
if isinstance(category, dict):
|
||
category_name = category.get("name", "Uncategorized")
|
||
else:
|
||
category_name = category if category else "Uncategorized"
|
||
|
||
# Get brand
|
||
brand = product.get("brand", {})
|
||
if isinstance(brand, dict):
|
||
brand_name = brand.get("name", "Unknown")
|
||
else:
|
||
brand_name = brand if brand else "Unknown"
|
||
|
||
# Get origin country
|
||
origin_country = product.get("origin_country", "Unknown")
|
||
|
||
# Calculate conversion rate
|
||
conversion_rate = (orders / views * 100) if views > 0 else 0
|
||
|
||
# Keyword filter
|
||
if keyword_list:
|
||
product_name = (product.get("name", "") or "").lower()
|
||
if keyword_logic == "and":
|
||
if not all(kw in product_name for kw in keyword_list):
|
||
continue
|
||
elif keyword_logic == "or":
|
||
if not any(kw in product_name for kw in keyword_list):
|
||
continue
|
||
elif keyword_logic == "not":
|
||
if any(kw in product_name for kw in keyword_list):
|
||
continue
|
||
|
||
# Views filter
|
||
if min_views is not None and views < min_views:
|
||
continue
|
||
if max_views is not None and views > max_views:
|
||
continue
|
||
|
||
# Orders filter
|
||
if min_orders is not None and orders < min_orders:
|
||
continue
|
||
if max_orders is not None and orders > max_orders:
|
||
continue
|
||
|
||
# Reviews filter
|
||
if min_reviews is not None and review_count < min_reviews:
|
||
continue
|
||
if max_reviews is not None and review_count > max_reviews:
|
||
continue
|
||
|
||
# Conversion rate filter
|
||
if min_conversion_rate is not None and conversion_rate < min_conversion_rate:
|
||
continue
|
||
if max_conversion_rate is not None and conversion_rate > max_conversion_rate:
|
||
continue
|
||
|
||
# Price filter
|
||
if min_price is not None and price < min_price:
|
||
continue
|
||
if max_price is not None and price > max_price:
|
||
continue
|
||
|
||
# Rating filter
|
||
if min_rating is not None and rating < min_rating:
|
||
continue
|
||
if max_rating is not None and rating > max_rating:
|
||
continue
|
||
|
||
# Rating count filter
|
||
if min_rating_count is not None and review_count < min_rating_count:
|
||
continue
|
||
if max_rating_count is not None and review_count > max_rating_count:
|
||
continue
|
||
|
||
# Category filter
|
||
if category_list and category_name not in category_list:
|
||
continue
|
||
|
||
# Brand filter
|
||
if brand_list and brand_name not in brand_list:
|
||
continue
|
||
|
||
# Origin country filter
|
||
if origin_list and origin_country not in origin_list:
|
||
continue
|
||
|
||
# Competition level filter
|
||
if competition_level:
|
||
cat_count = category_product_counts.get(category_name, 0)
|
||
total_cat_count = len(category_product_counts)
|
||
if total_cat_count > 0:
|
||
sorted_counts = sorted(category_product_counts.values())
|
||
low_threshold = sorted_counts[int(total_cat_count * 0.33)] if sorted_counts else 0
|
||
high_threshold = sorted_counts[int(total_cat_count * 0.67)] if sorted_counts else float('inf')
|
||
|
||
if competition_level == "low" and cat_count > low_threshold:
|
||
continue
|
||
elif competition_level == "high" and cat_count < high_threshold:
|
||
continue
|
||
|
||
# Potential score filter
|
||
if min_potential_score is not None:
|
||
max_cat_count = max(category_product_counts.values()) if category_product_counts else 1
|
||
normalized_competition = (cat_count / max_cat_count) * 100 if max_cat_count > 0 else 0
|
||
potential = (conversion_rate * 0.6) + ((100 - normalized_competition) * 0.4)
|
||
if potential < min_potential_score:
|
||
continue
|
||
|
||
# Add product to results
|
||
filtered_products.append({
|
||
"id": product_id,
|
||
"name": product.get("name", ""),
|
||
"category": category_name,
|
||
"brand": brand_name,
|
||
"price": round(price, 2),
|
||
"rating": round(rating, 2),
|
||
"rating_count": review_count,
|
||
"views": views,
|
||
"orders": orders,
|
||
"reviews": review_count,
|
||
"baskets": baskets,
|
||
"favorites": favorites,
|
||
"conversion_rate": round(conversion_rate, 2),
|
||
"origin_country": origin_country,
|
||
"image_url": product.get("imageUrl", ""),
|
||
"barcode": product.get("barcode", "")
|
||
})
|
||
|
||
print(f"✅ Filtreleme sonrası: {len(filtered_products)} ürün kaldı")
|
||
|
||
# Sort products
|
||
reverse_order = sort_order == "desc"
|
||
if sort_by == "views":
|
||
filtered_products.sort(key=lambda x: x["views"], reverse=reverse_order)
|
||
elif sort_by == "orders":
|
||
filtered_products.sort(key=lambda x: x["orders"], reverse=reverse_order)
|
||
elif sort_by == "reviews":
|
||
filtered_products.sort(key=lambda x: x["reviews"], reverse=reverse_order)
|
||
elif sort_by == "conversion_rate":
|
||
filtered_products.sort(key=lambda x: x["conversion_rate"], reverse=reverse_order)
|
||
elif sort_by == "price":
|
||
filtered_products.sort(key=lambda x: x["price"], reverse=reverse_order)
|
||
elif sort_by == "rating":
|
||
filtered_products.sort(key=lambda x: x["rating"], reverse=reverse_order)
|
||
|
||
# Pagination
|
||
total_products = len(filtered_products)
|
||
total_pages = (total_products + per_page - 1) // per_page
|
||
start_idx = (page - 1) * per_page
|
||
end_idx = start_idx + per_page
|
||
paginated_products = filtered_products[start_idx:end_idx]
|
||
|
||
print(f"✅ Sayfalama: {len(paginated_products)} ürün gösteriliyor (sayfa {page}/{total_pages})")
|
||
|
||
return {
|
||
"total_products": total_products,
|
||
"page": page,
|
||
"per_page": per_page,
|
||
"total_pages": total_pages,
|
||
"products": paginated_products
|
||
}
|
||
|
||
except Exception as e:
|
||
import traceback
|
||
error_trace = traceback.format_exc()
|
||
print(f"❌ Product finder error: {str(e)}")
|
||
print(f"Traceback: {error_trace}")
|
||
return {
|
||
"error": str(e),
|
||
"total_products": 0,
|
||
"page": page,
|
||
"per_page": per_page,
|
||
"total_pages": 0,
|
||
"products": []
|
||
}
|
||
|
||
|
||
# DISABLED: Questions feature removed per user request
|
||
# @app.get("/api/reports/{report_id}/questions-summary")
|
||
# def questions_summary(report_id: int, refresh: bool = False, db: Session = Depends(get_db)):
|
||
# # Try persistent cache first
|
||
# if not refresh:
|
||
# persisted = _load_json(f"{REPORTS_DIR}/enrich_{report_id}/questions.json")
|
||
# if persisted:
|
||
# return {"source": "file", **persisted}
|
||
# cache_key = f"{report_id}"
|
||
# if not refresh and cache_key in questions_cache:
|
||
# return {"source": "cache", **questions_cache[cache_key]}
|
||
#
|
||
# try:
|
||
# all_products, _ = load_report_products(db, report_id)
|
||
# product_ids = [p.get("id") for p in all_products if p.get("id")]
|
||
# summary = {"total_products": len(product_ids), "processed": 0, "total_questions": 0, "top_topics": {}, "sample": []}
|
||
#
|
||
# for pid in product_ids:
|
||
# data = fetch_merchant_questions(pid, page=0, page_size=5)
|
||
# if not data:
|
||
# _log_enrichment_error(db, report_id=report_id, product_id=int(pid), merchant_id=None,
|
||
# endpoint="questions", error_type="request_failed",
|
||
# message="questions request failed or timed out")
|
||
# continue
|
||
# summary["processed"] += 1
|
||
# res = data.get("result") or {}
|
||
# summary_data = res.get("summary", {})
|
||
#
|
||
# # Total questions - API'de summary.totalCount olarak geliyor
|
||
# total_q = summary_data.get("totalCount") or res.get("totalElements") or res.get("total") or 0
|
||
# summary["total_questions"] += total_q
|
||
#
|
||
# # Questions - API'de "questions" olarak geliyor (content değil)
|
||
# questions = res.get("questions", []) or res.get("content", [])
|
||
# for q in questions[:2]:
|
||
# # Question text - API'de "text" ve "originalText" var
|
||
# title = q.get("text") or q.get("originalText") or q.get("title") or q.get("question") or ""
|
||
#
|
||
# # Answer - API'de nested answer object var
|
||
# answer_obj = q.get("answer", {})
|
||
# if isinstance(answer_obj, dict):
|
||
# answer = answer_obj.get("text") or answer_obj.get("originalText") or ""
|
||
# else:
|
||
# answer = str(answer_obj) if answer_obj else ""
|
||
#
|
||
# # Topic - demo'da tag'ler var ama her soru için yok
|
||
# topic = q.get("topic") or q.get("category") or "Genel"
|
||
# summary["top_topics"][topic] = summary["top_topics"].get(topic, 0) + 1
|
||
# summary["sample"].append({"product_id": pid, "title": title[:120], "answer": answer[:160]})
|
||
# time.sleep(0.01)
|
||
#
|
||
# top_topics = sorted([{ "topic": k, "count": v } for k, v in summary["top_topics"].items()], key=lambda x: x["count"], reverse=True)[:30]
|
||
# result = {"summary": {**summary, "top_topics": top_topics}}
|
||
# questions_cache[cache_key] = result
|
||
# return result
|
||
# except Exception as e:
|
||
# return {"error": str(e), "note": "Network or parsing issue", "summary": {}}
|
||
|
||
|
||
# DISABLED: Similar products feature removed per user request
|
||
# @app.get("/api/reports/{report_id}/similar-summary")
|
||
# def similar_summary(report_id: int, refresh: bool = False, db: Session = Depends(get_db)):
|
||
# # Try persistent cache first
|
||
# if not refresh:
|
||
# persisted = _load_json(f"{REPORTS_DIR}/enrich_{report_id}/similar.json")
|
||
# if persisted:
|
||
# return {"source": "file", **persisted}
|
||
# cache_key = f"{report_id}"
|
||
# if not refresh and cache_key in similar_cache:
|
||
# return {"source": "cache", **similar_cache[cache_key]}
|
||
#
|
||
# try:
|
||
# all_products, _ = load_report_products(db, report_id)
|
||
# product_ids = [p.get("id") for p in all_products if p.get("id")]
|
||
# counter = {}
|
||
# examples = []
|
||
# for pid in product_ids:
|
||
# data = fetch_similar_products(pid, page=0, page_size=8)
|
||
# if not data:
|
||
# _log_enrichment_error(db, report_id=report_id, product_id=int(pid), merchant_id=None,
|
||
# endpoint="similar", error_type="request_failed",
|
||
# message="similar request failed or timed out")
|
||
# continue
|
||
# res = data.get("result") or data
|
||
# items = res.get("products") or res.get("items") or []
|
||
# sim_ids = []
|
||
# for it in items:
|
||
# sid = it.get("id") or it.get("productId")
|
||
# if sid:
|
||
# sidi = int(sid)
|
||
# sim_ids.append(sidi)
|
||
# counter[sidi] = counter.get(sidi, 0) + 1
|
||
# if sim_ids:
|
||
# examples.append({"product_id": pid, "similar": sim_ids[:5]})
|
||
# time.sleep(0.01)
|
||
#
|
||
# top_similars = sorted([{ "product_id": k, "count": v } for k, v in counter.items()], key=lambda x: x["count"], reverse=True)[:50]
|
||
# result = {"top_similar_products": top_similars, "examples": examples[:50]}
|
||
# similar_cache[cache_key] = result
|
||
# return result
|
||
# except Exception as e:
|
||
# return {"error": str(e), "note": "Network or parsing issue", "top_similar_products": []}
|
||
|
||
|
||
# DISABLED: Followers feature removed per user request
|
||
# @app.get("/api/reports/{report_id}/merchants/followers")
|
||
# def merchant_followers(report_id: int, refresh: bool = False, db: Session = Depends(get_db)):
|
||
# # Try persistent cache first
|
||
# if not refresh:
|
||
# persisted = _load_json(f"{REPORTS_DIR}/enrich_{report_id}/followers.json")
|
||
# if persisted:
|
||
# return {"source": "file", **persisted}
|
||
# cache_key = f"{report_id}"
|
||
# if not refresh and cache_key in followers_cache:
|
||
# return {"source": "cache", **followers_cache[cache_key]}
|
||
#
|
||
# try:
|
||
# all_products, _ = load_report_products(db, report_id)
|
||
# merchant_ids = []
|
||
# for p in all_products:
|
||
# merchant_listings = p.get("merchantListings", [])
|
||
# if merchant_listings:
|
||
# merchant = merchant_listings[0].get("merchant", {})
|
||
# mid = merchant.get("id")
|
||
# if mid:
|
||
# merchant_ids.append(int(mid))
|
||
# merchant_ids = sorted(list(set(merchant_ids)))
|
||
#
|
||
# details = {}
|
||
# for mid in merchant_ids:
|
||
# data = fetch_merchant_followers(mid)
|
||
# count = 0
|
||
# if data:
|
||
# res = data.get("result") or data
|
||
# count = res.get("followerCount") or res.get("count") or 0
|
||
# else:
|
||
# _log_enrichment_error(db, report_id=report_id, product_id=None, merchant_id=int(mid),
|
||
# endpoint="followers", error_type="request_failed",
|
||
# message="followers request failed or timed out")
|
||
# details[mid] = {"followers": _safe_int(count)}
|
||
# time.sleep(0.01)
|
||
#
|
||
# # top merchants by followers
|
||
# top_merchants = sorted([
|
||
# {"merchant_id": mid, "followers": info["followers"]}
|
||
# for mid, info in details.items()
|
||
# ], key=lambda x: x["followers"], reverse=True)[:50]
|
||
#
|
||
# result = {"total_merchants": len(merchant_ids), "details": details, "top_merchants": top_merchants}
|
||
# followers_cache[cache_key] = result
|
||
# return result
|
||
# except Exception as e:
|
||
# return {"error": str(e), "note": "Network or parsing issue", "details": {}, "top_merchants": []}
|
||
|
||
|
||
# ============================================================================
|
||
# ENRICHMENT MOTORU (Arka Plan Toplama + Kalıcı Cache)
|
||
# ============================================================================
|
||
|
||
# enrichment_progress already defined at line 343 as BoundedCache
|
||
|
||
def _enrich_build_product_info(all_products):
|
||
info = {}
|
||
for p in all_products:
|
||
pid = p.get("id")
|
||
if not pid:
|
||
continue
|
||
name = (p.get("name") or "Unknown")
|
||
brand = p.get("brand", {}).get("name")
|
||
display = f"{brand} - {name}" if brand else name
|
||
info[int(pid)] = {"name": display[:120]}
|
||
return info
|
||
|
||
|
||
def _enrich_report_task(report_id: int):
|
||
import time
|
||
db = SessionLocal()
|
||
try:
|
||
enrichment_progress[report_id] = {"status": "running", "step": "init", "done": 0, "total": 2}
|
||
all_products, _ = load_report_products(db, report_id)
|
||
product_ids = [p.get("id") for p in all_products if p.get("id")]
|
||
product_info = _enrich_build_product_info(all_products)
|
||
|
||
base_dir = f"{REPORTS_DIR}/enrich_{report_id}"
|
||
_ensure_dir(base_dir)
|
||
|
||
# DISABLED: Reviews feature removed per user request
|
||
# 1) Reviews - REMOVED
|
||
# enrichment_progress[report_id] = {"status": "running", "step": "reviews", "done": 0, "total": 2}
|
||
# rev_payload = reviews_summary(report_id, refresh=True, db=db) or {}
|
||
# _save_json(f"{base_dir}/reviews.json", rev_payload)
|
||
# time.sleep(0.1)
|
||
|
||
# 2) Social Proof
|
||
enrichment_progress[report_id] = {"status": "running", "step": "social", "done": 0, "total": 1}
|
||
soc_payload = social_proof(report_id, refresh=True, db=db) or {}
|
||
# ürün isimlerini detaylara iliştir
|
||
if soc_payload and soc_payload.get("details"):
|
||
details = soc_payload["details"]
|
||
enriched_details = {int(pid): {**vals, **product_info.get(int(pid), {})} for pid, vals in details.items()}
|
||
soc_payload["details"] = enriched_details
|
||
_save_json(f"{base_dir}/social.json", soc_payload)
|
||
time.sleep(0.1)
|
||
|
||
# DISABLED: Questions, similar products, and followers removed per user request
|
||
# # 3) Questions
|
||
# enrichment_progress[report_id] = {"status": "running", "step": "questions", "done": 2, "total": 5}
|
||
# q_payload = questions_summary(report_id, refresh=True, db=db) or {}
|
||
# _save_json(f"{base_dir}/questions.json", q_payload)
|
||
# time.sleep(0.1)
|
||
#
|
||
# # 4) Similar
|
||
# enrichment_progress[report_id] = {"status": "running", "step": "similar", "done": 3, "total": 5}
|
||
# s_payload = similar_summary(report_id, refresh=True, db=db) or {}
|
||
# _save_json(f"{base_dir}/similar.json", s_payload)
|
||
# time.sleep(0.1)
|
||
#
|
||
# # 5) Followers
|
||
# enrichment_progress[report_id] = {"status": "running", "step": "followers", "done": 4, "total": 5}
|
||
# f_payload = merchant_followers(report_id, refresh=True, db=db) or {}
|
||
# _save_json(f"{base_dir}/followers.json", f_payload)
|
||
# time.sleep(0.1)
|
||
|
||
enrichment_progress[report_id] = {"status": "completed", "step": "done", "done": 2, "total": 2}
|
||
except Exception as e:
|
||
enrichment_progress[report_id] = {"status": "error", "error": str(e)}
|
||
finally:
|
||
db.close()
|
||
|
||
|
||
@app.post("/api/reports/{report_id}/enrich/start")
|
||
def start_enrichment(report_id: int, background: BackgroundTasks):
|
||
enrichment_progress[report_id] = {"status": "queued", "step": "queued"}
|
||
background.add_task(_enrich_report_task, report_id)
|
||
return {"status": "started", "report_id": report_id}
|
||
|
||
|
||
@app.get("/api/reports/{report_id}/enrich/status")
|
||
def enrichment_status(report_id: int):
|
||
return enrichment_progress.get(report_id, {"status": "unknown"})
|
||
|
||
|
||
# ============================================================================
|
||
# HIDDEN CHAMPIONS ENDPOINT
|
||
# ============================================================================
|
||
|
||
@app.get("/api/reports/{report_id}/hidden-champions")
|
||
def get_hidden_champions(
|
||
report_id: int,
|
||
min_rating: float = 4.5,
|
||
max_review_count: int = 50,
|
||
social_multiplier: float = 1.5,
|
||
min_score: int = 60,
|
||
min_orders: int = 1, # Minimum satış sayısı (satış verisi çok önemli)
|
||
limit: int = 50,
|
||
db: Session = Depends(get_db)
|
||
):
|
||
"""
|
||
Gizli şampiyonları bul (özelleştirilmiş filtreler)
|
||
Rating >= 4.5, Yorum < 50, Orders >= 1, Social Proof kategorinin 1.5 katı üzerinde
|
||
"""
|
||
try:
|
||
from analytics.champion_finder import HiddenChampionFinder
|
||
|
||
all_products, categories_data = load_report_products(db, report_id)
|
||
social_data = _load_json(f"{REPORTS_DIR}/enrich_{report_id}/social.json") or {}
|
||
|
||
finder = HiddenChampionFinder()
|
||
|
||
filters = {
|
||
"min_rating": min_rating,
|
||
"max_review_count": max_review_count,
|
||
"social_multiplier": social_multiplier,
|
||
"min_score": min_score,
|
||
"min_orders": min_orders, # Minimum satış kontrolü
|
||
"limit": limit
|
||
}
|
||
|
||
# Sync olarak çalıştır
|
||
result = finder.find(all_products, social_data, filters)
|
||
|
||
return result
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
import traceback
|
||
return {
|
||
"error": str(e),
|
||
"traceback": traceback.format_exc(),
|
||
"total_found": 0,
|
||
"hidden_champions": []
|
||
}
|
||
|
||
|
||
# ============================================================================
|
||
# ANALYTICS TEST ENDPOINT
|
||
# ============================================================================
|
||
|
||
@app.get("/api/reports/{report_id}/test-analytics")
|
||
def test_analytics(report_id: int, db: Session = Depends(get_db)):
|
||
"""
|
||
Test endpoint: HHI Index ve Risk Skoru hesaplama testi
|
||
Mevcut bir raporun analytics metriklerini test eder
|
||
"""
|
||
from analytics.metrics import (
|
||
calculate_hhi_index,
|
||
calculate_competition_score_from_hhi,
|
||
get_rating_value,
|
||
get_review_count
|
||
)
|
||
import os
|
||
import json
|
||
|
||
try:
|
||
pass
|
||
# 1. Raporu yükle
|
||
report = db.query(Report).filter(Report.id == report_id).first()
|
||
if not report:
|
||
raise HTTPException(status_code=404, detail="Report not found")
|
||
|
||
# 2. Ürünleri yükle
|
||
all_products, categories_data = load_report_products(db, report_id)
|
||
|
||
if not all_products:
|
||
return {
|
||
"error": "No products found in report",
|
||
"report_id": report_id,
|
||
"report_name": report.name
|
||
}
|
||
|
||
# 3. HHI Index hesapla
|
||
hhi_index = calculate_hhi_index(all_products)
|
||
|
||
# 4. Rekabet skoru hesapla
|
||
competition_score = calculate_competition_score_from_hhi(hhi_index)
|
||
|
||
# 5. Marka dağılımı (Counter ile optimize edildi)
|
||
from collections import Counter
|
||
brand_names = [
|
||
p.get("brand", {}).get("name", "Unknown")
|
||
for p in all_products
|
||
if p.get("brand", {}).get("name")
|
||
]
|
||
brand_counts = Counter(brand_names)
|
||
|
||
top_brands = brand_counts.most_common(10)
|
||
|
||
# 6. Basit risk skoru (sadece rekabet bazlı)
|
||
# Bu basit bir test, tam risk skoru için risk_analyzer.py gerekli
|
||
risk_level = "DÜŞÜK" if competition_score >= 70 else "ORTA" if competition_score >= 50 else "YÜKSEK"
|
||
|
||
# 7. Ek metrikler
|
||
total_products = len(all_products)
|
||
total_brands = len(brand_counts)
|
||
|
||
# Ortalama rating
|
||
ratings = [get_rating_value(p) for p in all_products if get_rating_value(p) > 0]
|
||
avg_rating = sum(ratings) / len(ratings) if ratings else 0
|
||
|
||
# Ortalama fiyat
|
||
prices = [
|
||
p.get("price", {}).get("sellingPrice", 0)
|
||
for p in all_products
|
||
if p.get("price", {}).get("sellingPrice", 0) > 0
|
||
]
|
||
avg_price = sum(prices) / len(prices) if prices else 0
|
||
|
||
# 8. HHI yorumu ve stratejik tavsiye
|
||
if hhi_index < 1500:
|
||
hhi_comment = "Düşük konsantrasyon (rekabetçi pazar)"
|
||
strategic_advice = "Marka sadakati yok, SEO ve doğru fiyatlama ile pazar payı kapma potansiyeli yüksek!"
|
||
market_opportunity_score = round(100 - (hhi_index / 15), 2)
|
||
elif hhi_index < 2500:
|
||
hhi_comment = "Orta konsantrasyon"
|
||
strategic_advice = "Orta seviye rekabet var, marka gücü ve fiyat stratejisi önemli."
|
||
market_opportunity_score = round(70 - ((hhi_index - 1500) / 10), 2)
|
||
else:
|
||
hhi_comment = "Yüksek konsantrasyon (tekelci pazar)"
|
||
strategic_advice = "Güçlü markalar hakim, giriş için yüksek yatırım gerekli."
|
||
market_opportunity_score = round(max(0, 55 - ((hhi_index - 2500) / 50)), 2)
|
||
|
||
# Top 10 marka toplam payı
|
||
top_10_total_share = sum((count / total_products) * 100 for _, count in top_brands)
|
||
|
||
# Stratejik yorum (HHI düşükse)
|
||
if hhi_index < 100 and top_10_total_share < 20:
|
||
strategic_advice = f"Marka sadakati yok, SEO ve doğru fiyatlama ile pazar payı kapma potansiyeli %{market_opportunity_score:.0f}!"
|
||
|
||
return {
|
||
"success": True,
|
||
"report_id": report_id,
|
||
"report_name": report.name,
|
||
"metrics": {
|
||
"total_products": total_products,
|
||
"total_brands": total_brands,
|
||
"avg_rating": round(avg_rating, 2),
|
||
"avg_price": round(avg_price, 2)
|
||
},
|
||
"hhi_analysis": {
|
||
"hhi_index": hhi_index,
|
||
"comment": hhi_comment,
|
||
"competition_score": round(competition_score, 2),
|
||
"risk_level": risk_level,
|
||
"strategic_advice": strategic_advice,
|
||
"market_opportunity_score": market_opportunity_score,
|
||
"top_10_market_share": round(top_10_total_share, 2)
|
||
},
|
||
"top_brands": [
|
||
{
|
||
"brand": brand,
|
||
"product_count": count,
|
||
"market_share": round((count / total_products) * 100, 2)
|
||
}
|
||
for brand, count in top_brands
|
||
],
|
||
"market_share_validation": {
|
||
"top_10_total_share": round(sum((count / total_products) * 100 for _, count in top_brands), 2),
|
||
"all_brands_total_share": 100.0, # Doğrulama için
|
||
"total_brands_count": total_brands
|
||
},
|
||
"test_result": {
|
||
"hhi_calculated": hhi_index > 0,
|
||
"competition_score_calculated": competition_score >= 0,
|
||
"status": "✅ Test başarılı" if hhi_index > 0 and competition_score >= 0 else "❌ Test başarısız"
|
||
}
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
import traceback
|
||
return {
|
||
"success": False,
|
||
"error": str(e),
|
||
"traceback": traceback.format_exc(),
|
||
"report_id": report_id
|
||
}
|
||
|
||
|
||
# ============================================================================
|
||
# GOOGLE TRENDS & TRAFFIC SOURCES ENDPOINTS
|
||
# ============================================================================
|
||
|
||
@app.get("/api/products/traffic-sources")
|
||
async def get_traffic_sources_bulk(product_names: str):
|
||
"""
|
||
Get estimated traffic sources for multiple products
|
||
|
||
Args:
|
||
product_names: Comma-separated product names
|
||
|
||
Returns:
|
||
Dict with traffic source estimates for each product
|
||
"""
|
||
try:
|
||
# Split product names
|
||
names_list = [name.strip() for name in product_names.split(',')]
|
||
|
||
results = {}
|
||
for product_name in names_list[:10]: # Limit to 10 products at once
|
||
if not product_name:
|
||
continue
|
||
|
||
# Use default social proof values (will be replaced with real data in frontend)
|
||
traffic_data = estimate_traffic_sources(
|
||
product_name=product_name,
|
||
instagram_views=0,
|
||
tiktok_views=0,
|
||
twitter_shares=0
|
||
)
|
||
|
||
results[product_name] = traffic_data
|
||
|
||
return {
|
||
'success': True,
|
||
'results': results,
|
||
'total': len(results)
|
||
}
|
||
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.post("/api/products/traffic-sources/estimate")
|
||
async def estimate_product_traffic_sources(request_data: dict):
|
||
"""
|
||
Estimate traffic sources for a single product with social proof data
|
||
|
||
Request Body:
|
||
{
|
||
"product_name": "Casio Edifice Kol Saati",
|
||
"instagram_views": 10000,
|
||
"tiktok_views": 5000,
|
||
"twitter_shares": 500
|
||
}
|
||
|
||
Returns:
|
||
Traffic source percentage distribution
|
||
"""
|
||
try:
|
||
product_name = request_data.get('product_name', '')
|
||
instagram_views = request_data.get('instagram_views', 0)
|
||
tiktok_views = request_data.get('tiktok_views', 0)
|
||
twitter_shares = request_data.get('twitter_shares', 0)
|
||
|
||
if not product_name:
|
||
raise HTTPException(status_code=400, detail="product_name is required")
|
||
|
||
# Estimate traffic sources
|
||
traffic_data = estimate_traffic_sources(
|
||
product_name=product_name,
|
||
instagram_views=instagram_views,
|
||
tiktok_views=tiktok_views,
|
||
twitter_shares=twitter_shares
|
||
)
|
||
|
||
return {
|
||
'success': True,
|
||
'product_name': product_name,
|
||
'traffic_sources': traffic_data
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.get("/api/google-trends/test")
|
||
async def test_google_trends(product_name: str = "iPhone 15"):
|
||
"""
|
||
Test endpoint for Google Trends API
|
||
|
||
Args:
|
||
product_name: Product name to search (default: iPhone 15)
|
||
|
||
Returns:
|
||
Google Trends data
|
||
"""
|
||
try:
|
||
trends_data = fetch_google_trends(product_name)
|
||
|
||
return {
|
||
'success': True,
|
||
'product_name': product_name,
|
||
'trends_data': trends_data
|
||
}
|
||
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
if __name__ == "__main__":
|
||
import uvicorn
|
||
uvicorn.run(app, host="0.0.0.0", port=8001)
|