import time, re, json
from fb_common import session_facebook

links = json.load(open("/home/collectifweil/suivi_data/post_links.json"))
# garder permaliens nus de la page, dédupliqués en ordre
seen=set(); posts=[]
for l in links:
    m = re.search(r'(https://www\.facebook\.com/collectif\.de\.citoyens\.ucclois/posts/pfbid[0-9A-Za-z]+)', l)
    if m and m.group(1) not in seen:
        seen.add(m.group(1)); posts.append(m.group(1))
print("posts nus:", len(posts))
URL = posts[0]
print("INSPECTION:", URL)

d = session_facebook()
d.get(URL); time.sleep(6)

# tenter d'ouvrir tous les commentaires : cliquer filtre tri
from selenium.webdriver.common.by import By
def click_text(txts):
    for t in txts:
        try:
            els = d.find_elements(By.XPATH, "//*[normalize-space(text())='%s']" % t)
            for e in els[:1]:
                d.execute_script("arguments[0].click();", e); time.sleep(2); return t
        except Exception: pass
    return None

# expand "voir plus de commentaires" / "réponses"
for _ in range(8):
    clicked=False
    for t in ["Voir plus de commentaires","Afficher plus de commentaires","Plus de commentaires"]:
        els=d.find_elements(By.XPATH, "//span[contains(text(),'%s')]"%t)
        for e in els[:3]:
            try: d.execute_script("arguments[0].click();",e); clicked=True; time.sleep(1.5)
            except Exception: pass
    reps=d.find_elements(By.XPATH,"//span[contains(text(),'réponse')]")
    for e in reps[:8]:
        try: d.execute_script("arguments[0].click();",e); clicked=True; time.sleep(0.6)
        except Exception: pass
    d.execute_script("window.scrollTo(0, document.body.scrollHeight);"); time.sleep(1.5)
    if not clicked: break

h = d.page_source
open("/home/collectifweil/suivi_data/post0.html","w",encoding="utf-8").write(h)

# inventaire des aria-labels lies aux commentaires
labels = re.findall(r'aria-label="((?:Commentaire|Réponse|Comment|Reply)[^"]{0,60})"', h)
from collections import Counter
print("aria-labels (echantillon):", list(Counter(labels).items())[:8], "TOTAL", len(labels))

arts = d.find_elements(By.CSS_SELECTOR, '[aria-label^="Commentaire de"], [aria-label^="Réponse de"]')
print("articles commentaire/reponse:", len(arts))
# echantillon : 2 premiers
for a in arts[:2]:
    try:
        al = a.get_attribute("aria-label")
        txt = a.text.replace("\n"," | ")[:200]
        prof = [x.get_attribute("href") for x in a.find_elements(By.CSS_SELECTOR,"a[href*='facebook.com']")][:3]
        imgs = [x.get_attribute("src") for x in a.find_elements(By.CSS_SELECTOR,"img")][:3]
        print("---")
        print("LABEL:", al)
        print("TXT:", txt)
        print("LIENS:", prof)
        print("IMGS:", [ (i or '')[:60] for i in imgs])
    except Exception as e:
        print("err", e)
d.quit()