# import json
# import traceback
# import re
# from bs4 import BeautifulSoup
# from django.utils.timezone import now
# from extractly.models import NetworkSourceError, NetworkMonitoredPage, ManualDataSource
# from manual_agregator.notifications import send_alert_notification

# def _resolve_missing_bool(config):
#     option = config.get("ifMissing", "null")
#     if option == "true":
#         return True
#     elif option == "false":
#         return False
#     return None

# def _resolve_missing_text(config):
#     if config.get("ifMissing") == "default":
#         return config.get("defaultValue", "")
#     return None

# def parse_currency(label_or_text):
#     """Standardizes and detects currency code (PLN, EUR, USD, GBP, etc)."""
#     if not label_or_text:
#         return None
#     text = label_or_text.strip().lower().replace(" ", "")

#     CURRENCY_MAP = {
#         # PLN
#         "zł": "PLN", "pln": "PLN", "zl": "PLN", "zł.": "PLN",
#         # EUR
#         "eur": "EUR", "euro": "EUR", "€": "EUR",
#         # USD
#         "usd": "USD", "dolar": "USD", "$": "USD", "us$": "USD",
#         # GBP
#         "gbp": "GBP", "funt": "GBP", "£": "GBP",
#         # ...extend as needed
#     }
#     for k, v in CURRENCY_MAP.items():
#         if k in text:
#             return v
#     return None

# def extract_number_and_label(text):
#     """Extracts number, label (suffix), and currency code from a text."""
#     if not text:
#         return None, None, None
#     text = text.replace('\xa0', ' ').replace(',', '.')
#     match = re.search(r'([\d\s]+(?:\.\d+)?)', text)
#     number = None
#     label = None
#     currency = None
#     if match:
#         number_str = match.group(1).replace(" ", "")
#         try:
#             number = float(number_str) if "." in number_str else int(number_str)
#         except Exception:
#             number = None
#         label = text[match.end():].strip()
#         # Poprawka: ZAWSZE puszczamy label przez parse_currency!
#         currency = parse_currency(label) or parse_currency(text)
#     return number, label, currency


# def extract_value_by_label(soup, label):
#     for div in soup.find_all("div", attrs={"data-sentry-element": "ItemGridContainer"}):
#         ps = div.find_all("p")
#         if len(ps) >= 2 and label in ps[0].text:
#             return ps[1].text.strip()
#     return None

# def _check_inactive(page, soup, inactive_rules: list) -> bool:
#     for rule in inactive_rules:
#         rule_type = rule.get("type")
#         if rule_type == "text_contains":
#             text = rule.get("text", "").lower()
#             if text in page.html.lower():
#                 return True
#         elif rule_type == "selector_text":
#             selector = rule.get("selector")
#             expected_text = rule.get("text", "").lower()
#             element = soup.select_one(selector)
#             if element and element.get_text(strip=True).lower() == expected_text:
#                 return True
#         elif rule_type == "selector_contains":
#             selector = rule.get("selector")
#             expected_text = rule.get("text", "").lower()
#             element = soup.select_one(selector)
#             if element and expected_text in element.get_text(strip=True).lower():
#                 return True
#         elif rule_type == "selector_missing":
#             selector = rule.get("selector")
#             if not soup.select_one(selector):
#                 return True
#         elif rule_type == "source_field_match":
#             field = rule.get("field")
#             match = rule.get("match", "").lower()
#             value = getattr(page, field, "")
#             if isinstance(value, dict):
#                 value = json.dumps(value)
#             if value and match in str(value).lower():
#                 return True
            
#     return False





# def extract_image_links(soup, selector=None):
#     """
#     Zwraca listę unikalnych linków do zdjęć (src z <img>, srcset z <source>) wg selektora lub domyślnie cała strona.
#     """
#     urls = set()
#     context = soup.select(selector) if selector else [soup]
#     for scope in context:
#         # Wszystkie <img>
#         for img in scope.find_all("img"):
#             src = img.get("src")
#             if src and src.startswith("http"):
#                 urls.add(src)
#         # Wszystkie <source> (np. w <picture>)
#         for src in scope.find_all("source"):
#             # Obsługa srcset (może być kilka rozdzielonych przecinkiem)
#             srcset = src.get("srcset")
#             if srcset:
#                 for url in srcset.split(","):
#                     url = url.strip().split(" ")[0]
#                     if url and url.startswith("http"):
#                         urls.add(url)
#     return list(urls)






# def parse_manual_data(page: NetworkMonitoredPage, strict=True) -> bool:
#     source = page.source

#     try:
#         manual_config: ManualDataSource = source.manual_data_source_fetcher
#     except ManualDataSource.DoesNotExist:
#         NetworkSourceError.objects.create(
#             source=source,
#             error_message="Brak ManualDataSource dla tego źródła.",
#             error_type="MissingManualDataSource"
#         )
#         print(f"[ERROR] ManualDataSource does not exist for source {source}")
#         return False

#     try:
#         print("="*80)
#         print(f"[INFO] Parsing page: {page.url}")
#         print(f"[INFO] First 500 characters of HTML:\n{page.html[:500]}\n")

#         soup = BeautifulSoup(page.html, 'html.parser')
#         extracted = {}
#         main_values = {}

#         # 1. Detect type (rules)
#         selected_type = None
#         rules = manual_config.rules or []
#         for rule in rules:
#             if "selector" in rule:
#                 element = soup.select_one(rule["selector"])
#                 if element:
#                     content = element.get_text(strip=True).lower()
#                     match_values = rule.get("match", [])
#                     if isinstance(match_values, str):
#                         match_values = [match_values]
#                     if any(val.lower() in content for val in match_values):
#                         selected_type = rule["type"]
#                         print(f"[DEBUG] Rule matched by selector: {rule['selector']}, type: {selected_type}")
#                         break
#             elif "source" in rule:
#                 value = getattr(page, rule["source"], "")
#                 if isinstance(value, dict):
#                     value = json.dumps(value)
#                 elif value is None:
#                     continue
#                 value = str(value).lower()
#                 match_values = rule.get("match", [])
#                 if isinstance(match_values, str):
#                     match_values = [match_values]
#                 if any(val.lower() in value for val in match_values):
#                     selected_type = rule["type"]
#                     print(f"[DEBUG] Rule matched by source: {rule['source']}, type: {selected_type}")
#                     break

#         all_selectors = manual_config.selectors
#         if isinstance(all_selectors, dict) and selected_type and selected_type in all_selectors:
#             selectors = all_selectors[selected_type]
#         else:
#             selectors = all_selectors  # fallback

#         print(f"[INFO] Using selectors (type: {selected_type or 'default'}): {list(selectors.keys())}")

#         # 2. Parse all isMain fields first to fill main_values
#         for field_name, config in selectors.items():
#             if config.get("isMain"):
#                 field_type = config.get("fieldType", "text")
#                 text_value = None
#                 print(f"\n[FIELD] (isMain) Parsing field: {field_name}, fieldType: {field_type}")

#                 # Handle number and bool in labelPair
#                 if field_type == "labelPair":
#                     label = config.get("label")
#                     raw = extract_value_by_label(soup, label)
#                     # Decide which valueType we want: text, number, or bool
#                     value_type = config.get("valueType", "text")
#                     if value_type == "number":
#                         number, label_val, currency = extract_number_and_label(raw)
#                         if config.get("currencyField") and currency:
#                             extracted[config["currencyField"]] = currency
#                         if config.get("labelField") and label_val:
#                             extracted[config["labelField"]] = label_val
#                         text_value = number
#                         print(f"[DEBUG] labelPair-number: label={label}, value={text_value}, currency={currency}")
#                     elif value_type == "bool":
#                         val = (raw or "").strip().lower()
#                         true_opts = config.get("trueOptions", ["tak", "yes", "true", "1"])
#                         false_opts = config.get("falseOptions", ["nie", "no", "false", "0"])
#                         if any(opt == val for opt in true_opts):
#                             text_value = True
#                         elif any(opt == val for opt in false_opts):
#                             text_value = False
#                         else:
#                             text_value = _resolve_missing_bool(config)
#                         print(f"[DEBUG] labelPair-bool: label={label}, value={text_value}")
#                     else:
#                         text_value = raw
#                         print(f"[DEBUG] labelPair-text: label={label}, value={text_value}")

#                 elif field_type == "number":
#                     selector = config.get("selector")
#                     element = soup.select_one(selector)
#                     raw = element.get_text(strip=True) if element else _resolve_missing_text(config)
#                     number, label_val, currency = extract_number_and_label(raw)
#                     if config.get("currencyField") and currency:
#                         extracted[config["currencyField"]] = currency
#                     if config.get("labelField") and label_val:
#                         extracted[config["labelField"]] = label_val
#                     text_value = number
#                     print(f"[DEBUG] number: selector={selector}, value={text_value}, currency={currency}")

#                 elif field_type == "bool":
#                     selector = config.get("selector")
#                     element = soup.select_one(selector)
#                     raw = element.get_text(strip=True) if element else _resolve_missing_text(config)
#                     val = (raw or "").strip().lower()
#                     true_opts = config.get("trueOptions", ["tak", "yes", "true", "1"])
#                     false_opts = config.get("falseOptions", ["nie", "no", "false", "0"])
#                     if any(opt == val for opt in true_opts):
#                         text_value = True
#                     elif any(opt == val for opt in false_opts):
#                         text_value = False
#                     else:
#                         text_value = _resolve_missing_bool(config)
#                     print(f"[DEBUG] bool: selector={selector}, value={text_value}")

#                 elif field_type == "text":
#                     selector = config.get("selector")
#                     element = soup.select_one(selector)
#                     if element:
#                         if config.get("paragraphs", False):
#                             text_value = element.get_text(separator="\n", strip=True)
#                         else:
#                             text_value = element.get_text(strip=True)
#                     else:
#                         text_value = _resolve_missing_text(config)
#                     print(f"[DEBUG] text: selector={selector}, value={text_value}")


#                 main_values[field_name] = text_value
#                 extracted[field_name] = text_value

#         # 3. Now parse all other fields
#         for field_name, config in selectors.items():
#             if config.get("isMain"):
#                 continue  # already handled
            



#             field_type = config.get("fieldType", "text")
#             text_value = None
#             print(f"\n[FIELD] Parsing field: {field_name}, fieldType: {field_type}")
            
#             if field_type == "images":
#                 selector = config.get("selector")
#                 text_value = extract_image_links(soup, selector)
#                 print(f"[DEBUG] images: selector={selector}, found={len(text_value)} links")
#                 extracted[field_name] = text_value   # <-- MUSI BYĆ!
#                 continue

#             if field_type == "labelPair":
#                 label = config.get("label")
#                 raw = extract_value_by_label(soup, label)
#                 value_type = config.get("valueType", "text")
#                 if value_type == "number":
#                     number, label_val, currency = extract_number_and_label(raw)
#                     if config.get("currencyField") and currency:
#                         extracted[config["currencyField"]] = currency
#                     if config.get("labelField") and label_val:
#                         extracted[config["labelField"]] = label_val
#                     text_value = number
#                     print(f"[DEBUG] labelPair-number: label={label}, value={text_value}, currency={currency}")
#                 elif value_type == "bool":
#                     val = (raw or "").strip().lower()
#                     true_opts = config.get("trueOptions", ["tak", "yes", "true", "1"])
#                     false_opts = config.get("falseOptions", ["nie", "no", "false", "0"])
#                     if any(opt == val for opt in true_opts):
#                         text_value = True
#                     elif any(opt == val for opt in false_opts):
#                         text_value = False
#                     else:
#                         text_value = _resolve_missing_bool(config)
#                     print(f"[DEBUG] labelPair-bool: label={label}, value={text_value}")
#                 else:
#                     text_value = raw
#                     print(f"[DEBUG] labelPair-text: label={label}, value={text_value}")

#             elif field_type == "number":
#                 selector = config.get("selector")
#                 element = soup.select_one(selector)
#                 raw = element.get_text(strip=True) if element else _resolve_missing_text(config)
#                 number, label_val, currency = extract_number_and_label(raw)
#                 if config.get("currencyField") and currency:
#                     extracted[config["currencyField"]] = currency
#                 if config.get("labelField") and label_val:
#                     extracted[config["labelField"]] = label_val
#                 text_value = number
#                 print(f"[DEBUG] number: selector={selector}, value={text_value}, currency={currency}")

#             elif field_type == "bool":
#                 selector = config.get("selector")
#                 element = soup.select_one(selector)
#                 raw = element.get_text(strip=True) if element else _resolve_missing_text(config)
#                 val = (raw or "").strip().lower()
#                 true_opts = config.get("trueOptions", ["tak", "yes", "true", "1"])
#                 false_opts = config.get("falseOptions", ["nie", "no", "false", "0"])
#                 if any(opt == val for opt in true_opts):
#                     text_value = True
#                 elif any(opt == val for opt in false_opts):
#                     text_value = False
#                 else:
#                     text_value = _resolve_missing_bool(config)
#                 print(f"[DEBUG] bool: selector={selector}, value={text_value}")

#             elif "fromMain" in config:
#                 base_field = config.get("fromMain")
#                 base_value = main_values.get(base_field)
#                 print(f"[DEBUG] fromMain: base_field={base_field}, value={base_value}")
#                 split_by = config.get("splitBy") or selectors.get(base_field, {}).get("splitBy", ",")
#                 split_index = config.get("splitIndex")
#                 if base_value and split_index is not None:
#                     parts = [p.strip() for p in base_value.split(split_by)]
#                     if 0 <= split_index < len(parts):
#                         text_value = parts[split_index]
#                         print(f"[DEBUG] fromMain split: split_by={split_by}, split_index={split_index}, value={text_value}")
#                     else:
#                         text_value = None
#                         print(f"[DEBUG] fromMain split: split_by={split_by}, split_index={split_index}, value=None (out of range)")
#                 elif base_value:
#                     text_value = base_value
#                 else:
#                     text_value = _resolve_missing_text(config)

#             elif field_type == "text":
#                 selector = config.get("selector")
#                 element = soup.select_one(selector)
#                 if element:
#                     if config.get("paragraphs", False):
#                         text_value = element.get_text(separator="\n", strip=True)
#                     else:
#                         text_value = element.get_text(strip=True)
#                 else:
#                     text_value = _resolve_missing_text(config)
#                 print(f"[DEBUG] text: selector={selector}, value={text_value}")


#             elif field_type == "boolKey":
#                 selector = config.get("selector")
#                 element = soup.select_one(selector)
#                 if element:
#                     content = element.get_text(strip=True).lower()
#                     if any(opt.lower() in content for opt in config.get("trueOptions", [])):
#                         text_value = True
#                     elif any(opt.lower() in content for opt in config.get("falseOptions", [])):
#                         text_value = False
#                     else:
#                         text_value = _resolve_missing_bool(config)
#                     print(f"[DEBUG] boolKey: selector={selector}, value={text_value}")
#                 else:
#                     text_value = _resolve_missing_bool(config)
#                     print(f"[DEBUG] boolKey: selector={selector}, value=None (missing, fallback={text_value})")

#             elif field_type == "key":
#                 selector = config.get("selector")
#                 element = soup.select_one(selector)
#                 if element:
#                     content = element.get_text(strip=True).lower()
#                     key_map = config.get("keyMap", {})
#                     matched = None
#                     for final_value, variants in key_map.items():
#                         if any(variant.lower() in content for variant in variants):
#                             matched = final_value
#                             break
#                     text_value = matched
#                     print(f"[DEBUG] key: selector={selector}, value={text_value}")
#                 else:
#                     text_value = _resolve_missing_text(config)
#                     print(f"[DEBUG] key: selector={selector}, value=None (missing, fallback={text_value})")

#             elif field_type == "many":
#                 selectors_list = config.get("selectors", [])
#                 combined = ""
#                 for sel in selectors_list:
#                     element = soup.select_one(sel)
#                     if element:
#                         combined += " " + element.get_text(separator=" ", strip=True)
#                 for clean_item in config.get("cleanOptions", []):
#                     combined = combined.replace(clean_item, "")
#                 combined = combined.strip()
#                 text_value = combined if combined else _resolve_missing_text(config)
#                 print(f"[DEBUG] many: selectors={selectors_list}, value={text_value}")

#             if strict and text_value is None:
#                 print(f"[WARNING] Value not found for field: {field_name} (config: {config})")
#                 raise ValueError(f"Value not found: {field_name} -> {config}")

#             extracted[field_name] = text_value

#         print(f"\n[INFO] Extracted fields for {page.url}:")
#         for k, v in extracted.items():
#             print(f"    {k}: {v}")

#         # 4. Transfer fields from NetworkMonitoredPage if defined
#         for key, source_field in (getattr(manual_config, "trasferred", {}) or {}).items():
#             if hasattr(page, source_field):
#                 extracted[key] = getattr(page, source_field)

#         # 5. Inactive check
#         inactive_rules = manual_config.inactive or []
#         if _check_inactive(page, soup, inactive_rules):
#             page.is_active = False
#             page.inactive_date = now()

#         # 6. Statistics
#         total_fields = len(selectors)
#         found_fields = sum(1 for val in extracted.values() if val not in [None, "", [], {}])
#         extracted["_stats"] = {
#             "found_fields": found_fields,
#             "total_fields": total_fields,
#             "found_keys": [k for k, v in extracted.items() if k != "_stats" and v not in [None, "", [], {}]],
#             "missing_keys": [k for k, v in extracted.items() if k != "_stats" and v in [None, "", [], {}]],
#             "type_used": selected_type or "default"
#         }
#         print(f"[INFO] Stats: {extracted['_stats']}")

#         # 7. Save to DB
#         page.raw_data = json.dumps(extracted, ensure_ascii=False)
#         page.parse_data = extracted
#         page.is_complete = True
#         page.save()

#         print(f"[SUCCESS] Data parsed and saved for page: {page.url}\n{'='*80}")

#         return True

#     except Exception:
#         print(f"[EXCEPTION] Error while parsing page: {page.url}\n{traceback.format_exc()}")
#         NetworkSourceError.objects.create(
#             source=source,
#             error_message=traceback.format_exc(),
#             error_type="ManualParsingError"
#         )
#         send_alert_notification(
#             f"Błąd parsowania ogłoszenia {page.url} ({source.title})\n\n{traceback.format_exc()}",
#             subject=f"❌ Błąd parsowania: {source.title}",
#         )
#         return False
