from flask import Flask, render_template,request
from flask_sqlalchemy import SQLAlchemy
from flask_migrate import Migrate
import requests
from collections import OrderedDict
import os
from dotenv import load_dotenv
import urllib.parse
from urllib.parse import urlparse
import re
from datetime import datetime
import threading
from sqlalchemy import func, and_
from openai import OpenAI
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
from requests.exceptions import Timeout, SSLError, RequestException
# Initialize Flask app
app = Flask(__name__)


# Configuring database URI for Laragon MySQL
# app.config['SQLALCHEMY_DATABASE_URI'] = 'mysql://root:@localhost/news_dahir'
app.config['SQLALCHEMY_DATABASE_URI'] = 'mysql+pymysql://root:@localhost/dre'
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
def safe_get(url, headers=None, timeout=10):
    """
    Safely performs an HTTP GET request.
    Returns response if successful, or None if failed.
    """
    try:
        response = requests.get(url, headers=headers, timeout=timeout)
        response.raise_for_status()
        return response
    except requests.exceptions.RequestException as e:
        print(f"[❌] Error fetching {url}: {e}")
        return None
# Initialize SQLAlchemy and Migrate
db = SQLAlchemy(app)
migrate = Migrate(app, db)

# Define the Article model
class ScrapedArticle(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    # title = db.Column(db.String(800))
    title = db.Column(db.Text)
    description = db.Column(db.Text)
    publish_date = db.Column(db.String(50))
    link = db.Column(db.String(500))
    author = db.Column(db.String(100), default='No author provided')
    logo = db.Column(db.String(255), default=None)
    logo1 = db.Column(db.String(255), default=None)
    website = db.Column(db.String(255))
    image_url = db.Column(db.String(500), nullable=True)
    # badge = db.Column(db.String(50), nullable=True)
    badge = db.Column(db.String(255), nullable=True)
    website_url = db.Column(db.String(500), nullable=True)
    category = db.Column(db.String(255))

    priority = db.Column(db.Integer, default=10)
    section = db.Column(db.String(50))
    ai_summary = db.Column(db.Text)
    ai_tags = db.Column(db.Text)
    ai_category = db.Column(db.String(100))
    

    def __init__(self, title, description, publish_date, link, author, logo, logo1, website,
        image_url=None, badge=None, website_url=None, category=None, priority=10, section=None):
        self.title = title
        self.description = description
        self.publish_date = publish_date
        self.link = link
        self.author = author
        self.logo = logo
        self.logo1 = logo1
        self.website = website
        self.image_url = image_url
        self.badge = badge
        self.website_url = website_url
        if category is None:
            self.category = self.determine_category(title, description)
        else:
            self.category = category
        self.priority = priority
        self.section = section
       

    def determine_category(self, title, description):
        title = title or ''
        description = description or ''
        keywords = {
            "crypto": [],
            "technology news": ["technology", "tech", "gadgets", "software", "hardware","AI", "artificial intelligence", "machine learning", "deep learning","fintech", "financial", "banking", "payments", "money","startup", "startups", "entrepreneurship", "venture", "founder","crypto", "cryptocurrency", "blockchain", "bitcoin", "ethereum"],
            # "Digital Economy": ["fintech", "financial", "banking", "payments", "money","startup", "startups", "entrepreneurship", "venture", "founder","crypto", "cryptocurrency", "blockchain", "bitcoin", "ethereum"],


        }

        for category, keyword_list in keywords.items():
            if any(keyword.lower() in title.lower() or keyword.lower() in description.lower() for keyword in keyword_list):
                return category
        return "Trending"

# new1

# =========================
# 🤖 AI FUNCTIONS
# =========================

def generate_ai_summary(title, description):
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{
                "role": "user",
                "content": f"Summarize in 30 words:\n{title}\n{description}"
            }],
            max_tokens=80
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"[❌ AI error]: {e}")
        return "Summary unavailable"


def generate_ai_tags(title, description):
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{
                "role": "user",
                "content": f"Give 3-5 tags:\n{title}"
            }],
            max_tokens=30
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"[❌ TAG ERROR]: {e}")
        return None


def categorize_article(title, description):
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{
                "role": "user",
                "content": f"Categorize into AI, Fintech, Startups, Cybersecurity, Telecom, Policy, Other:\n{title}"
            }],
            max_tokens=10
        )
        return response.choices[0].message.content.strip()
    except:
        return "Other"
    
# NEW2
def update_ai(limit=10):
    articles = ScrapedArticle.query.filter(
        ScrapedArticle.description != None
    ).limit(limit).all()

    for article in articles:
        article.ai_summary = generate_ai_summary(article.title, article.description)
        print(f"🤖 Summary: {article.title[:40]}")

    db.session.commit()


def update_ai_tags(limit=10):
    articles = ScrapedArticle.query.filter(
        ScrapedArticle.description != None
    ).limit(limit).all()

    for article in articles:
        article.ai_tags = generate_ai_tags(article.title, article.description)
        print(f"🏷 Tags: {article.title[:40]}")

    db.session.commit()


def update_ai_category(limit=10):
    articles = ScrapedArticle.query.filter(
        ScrapedArticle.description != None
    ).limit(limit).all()

    for article in articles:
        article.category = categorize_article(article.title, article.description)
        print(f"📂 Category: {article.title[:40]}")

    db.session.commit()

    # NEW3

def run_ai_background():
    with app.app_context():
        print("🤖 Running AI...")
        update_ai(limit=10)
        update_ai_tags(limit=10)
        update_ai_category(limit=10)
def determine_category(title, description):

    keywords = {

        "technology news": ["technology", "tech", "gadgets", "software", "hardware","energy", "renewable", "solar", "wind", "electricity", "power","oil", "gas","lng","cybersecurity", "data security", "network security", "information security", "encryption", "hacking""AI", "artificial intelligence", "machine learning", "deep learning","fintech", "financial", "banking", "payments", "money","forex ","startup", "startups", "entrepreneurship", "venture", "founder","crypto", "cryptocurrency", "blockchain", "bitcoin", "ethereum", ],


    }


    if title is None:
        title = ""
    if description is None:
        description = ""


    for category, keyword_list in keywords.items():
        if any(keyword.lower() in title.lower() or keyword.lower() in description.lower() for keyword in keyword_list):
            return category
    return "technology news"
load_dotenv()
# client = OpenAI(api_key="OPENAI_API_KEY") 
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def categorize_article(title, summary):
    try:
        prompt = f"""
        Categorize this tech news article into ONE category:

        AI, Fintech, Startups, Cybersecurity, Telecom, Policy, Other

        Title: {title}
        Summary: {summary}

        Return ONLY the category name.
        """

        response = client.chat.completions.create(
            model="gpt-4.1-mini",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )

        return response.choices[0].message.content.strip()

    except Exception as e:
        print("AI Error:", e)
        return "Other"
    
def fetch_news_disrupt():
    url = 'https://disruptafrica.com/category/region/east-africa/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all('article', class_='l-post grid-post grid-base-post')[:1]

    for article in article_collection:
        badge_element = article.find(
            'a', class_='article-list-category')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find('h2', class_='is-title post-title')
        link_element = title_element.find('a') if title_element else None
        author_element = article.find('span', class_='reviewer')
        date_element = article.find('div', class_='post-date')
        image_element = article.find('span')['data-bgsrc'] if article.find('span') else None
        description = article.find('div', class_='excerpt').text.strip() if article.find('div', class_='excerpt') else None
        title = link_element.text.strip() if link_element else None
        link = link_element['href'] if link_element else None
        publish_date = date_element.text.strip() if date_element else 'Recently Added'
        author = author_element.text.strip() if author_element else ''
        logo = 'https://uhm.kzo.mybluehost.me/wp-content/uploads/2023/07/DA-logo-on-transparent-with-tag-small-e1414593297539-1.png'
        logo1 = 'https://flagpedia.net/data/flags/h80/ke.webp'
        website = 'Disrupt Africa'
        author = author or website
        website_url='https://disruptafrica.com/category/region/east-africa/'
        category = determine_category(title, description)
        priority = 1
        section = 'top'
        articles.append({
            'title': title,
            'description': description,
            'publish_date': publish_date,
            'link': link,
            'author': author,
            'logo': logo,
            'logo1': logo1,
            'website': website,
            'image_url': image_element,
            'badge': badge,
            'website_url': website_url,
            'category': category,
            'priority': priority,
            'section': section
        })

    return articles


# # Scrape articles from VOA Central
def fetch_news_voa_central():
    url = 'https://techcabal.com/search'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'article', class_='article-list-item')[:1]

    for article in article_collection:
        badge_element = article.find(
            'a', class_='article-list-category')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find('a', class_='article-list-title')
        title = title_element.text.strip() if title_element else ''
        publish_date = article.find(
            'span', class_='article-list-date').text.strip().replace('\n', '')

        author_element = article.find(
            'span', class_='article-list-author')
        description = article.find(
            'div', class_='excerpt')
        if description:
            description = description.text.strip()
        else:
            description = None
        image_element = article.find('img')
        image_url = image_element['src'] if image_element else None
        author = author_element.text.strip() if author_element else None
        link = article.find('a')['href']
        logo = 'https://c76c7bbc41.mjedge.net/wp-content/themes/tc2.0/images/tclogo.png'
        logo1 = 'https://flagpedia.net/data/flags/h80/ng.webp'
        website='Techcabal'
        website_url='https://techcabal.com/search'
        category = determine_category(title, description)
        priority = 2
        section = 'side'
        articles.append({
            'title': title,
            'badge': badge,
            'description': description,
            'publish_date': publish_date,
            'link': link,
            'logo': logo,
            'logo1': logo1,
            'image_url': image_url,
            'author': author,
            'website': website,
            'website_url': website_url,
            'category': category,
            'priority': priority,
            'section': section

        })

    return articles


import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin


def fetch_news_central():

    url = 'https://techcentral.co.za/category/sections/'

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
    }

    response = requests.get(url, headers=headers, timeout=30)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []

    article_collection = soup.find_all(
        'article',
        class_='l-post grid-post grid-base-post'
    )[:1]

    for article in article_collection:

        try:
          
            title_element = article.find(
                'h2',
                class_='is-title post-title'
            )

            if not title_element:
                continue

            title = title_element.get_text(strip=True)

        
            link_tag = article.find('a', href=True)
            link = link_tag['href'] if link_tag else ''

            author_element = article.find(
                'span',
                class_='meta-item post-author'
            )
            author = author_element.get_text(strip=True) if author_element else None

           
            date_element = article.find('span', class_='date-link')
            publish_date = date_element.get_text(strip=True) if date_element else None

           
            badge_element = article.find('a', class_='article-list-category')
            badge = badge_element.get_text(strip=True) if badge_element else ''

          
            desc_element = article.find('div', class_='excerpt')
            description = desc_element.get_text(strip=True) if desc_element else None

        
            image_url = ''

            # 1️⃣ Try IMG tag
            img = article.find('img')
            if img:
                image_url = (
                    img.get('src') or
                    img.get('data-src') or
                    img.get('data-lazy-src') or
                    img.get('data-bgsrc') or
                    ''
                )

           
            if not image_url:
                span = article.find('span')
                if span:
                    image_url = span.get('data-bgsrc', '')

          
            if not image_url:
                a_element = article.find('a')
                if a_element:
                    style_attr = a_element.get('style', '')
                    if 'background-image' in style_attr:
                        image_url = (
                            style_attr.split('url(')[-1]
                            .split(')')[0]
                            .replace('"', '')
                            .replace("'", '')
                        )

            # Make image absolute URL
            if image_url and not image_url.startswith('http'):
                image_url = urljoin(url, image_url)

           
            website = 'Tech Central'
            website_url = url

            logo = 'https://hub.techcentral.co.za/wp-content/uploads/2020/06/TechCentral-Logo.jpg'
            logo1 = 'https://flagpedia.net/data/flags/h80/za.webp'

            category = determine_category(title, description)
            priority = 3
            section = 'bottom'

       
            articles.append({
                'title': title,
                'publish_date': publish_date,
                'link': link,
                'author': author,
                'image_url': image_url,
                'logo': logo,
                'logo1': logo1,
                'description': description,
                'website': website,
                'badge': badge,
                'website_url': website_url,
                'category': category,
                'priority': priority,
                'section': section
            })

        except Exception as e:
            print(f"TechCentral scrape error: {e}")
            continue

    return articles
def fetch_news_techbusiness():

    url = 'https://www.businesstechafrica.co.za/category/technology/'

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36'
    }


    session = requests.Session()

    retries = Retry(
        total=3,
        backoff_factor=2,
        status_forcelist=[500, 502, 503, 504]
    )

    adapter = HTTPAdapter(max_retries=retries)
    session.mount("http://", adapter)
    session.mount("https://", adapter)


    try:
        response = session.get(url, headers=headers, timeout=(10, 60))
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print("BusinessTech request failed:", e)
        return []

    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []

    article_collection = soup.find_all(
        'article', class_='l-post grid-post grid-base-post'
    )[:1]

    for article in article_collection:

        try:
           
            badge_element = article.find(
                'span',
                class_='cat-labels cat-labels-overlay c-overlay p-bot-left'
            )
            badge = badge_element.get_text(strip=True) if badge_element else ''

            title_element = article.find('h2', class_='is-title post-title')
            if not title_element:
                continue

            title = title_element.get_text(strip=True)

        
            desc_element = article.find('div', class_='excerpt')
            description = desc_element.get_text(strip=True) if desc_element else ''

         
            date_element = article.find('time', class_='post-date')
            publish_date = date_element.get_text(strip=True) if date_element else 'Recently Added'

           
            image_url = ''

            img = article.find('img')
            if img:
                image_url = (
                    img.get('data-src') or
                    img.get('src') or
                    ''
                )

            
            if not image_url:
                span = article.find('span')
                if span:
                    image_url = span.get('data-bgsrc', '')

            if image_url and not image_url.startswith('http'):
                image_url = urljoin(url, image_url)

       
            link_tag = article.find('a', href=True)
            link = link_tag.get('href') if link_tag else ''

          
            author_element = article.find('span', class_='meta-item post-author')
            author = author_element.get_text(strip=True) if author_element else 'Business Tech Africa'

            
            website = 'Business Tech Africa'
            website_url = url

            logo = 'https://www.businesstechafrica.co.za/wp-content/uploads/2023/03/logo-1.png'
            logo1 = 'https://flagpedia.net/data/flags/h80/za.webp'

            category = determine_category(title, description)
            priority = 4
            section = 'top'

            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'image_url': image_url,
                'logo': logo,
                'logo1': logo1,
                'link': link,
                'website': website,
                'website_url': website_url,
                'author': author,
                'category': category,
                'priority': priority,
                'section': section
            })

        except Exception as e:
            print("BusinessTech scrape error:", e)
            continue

    return articles


def fetch_news_techpoint():
    url = 'https://techpoint.africa/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div', class_='oxy-dynamic-list')[:1]


    for article in article_collection:
        badge_element = article.find(
            'div', class_='ct-code-block article-category-link')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find(
            'span', class_='ct-span')
        title = title_element.text.strip()  if title_element else ''
        link = article.find('a')['href']
        publish = article.find(
            'div', class_='ct-code-block')
        publish_date = publish.text.strip() if publish else ''
        image_element = article.find('img')

        if image_element:
         image_url = image_element.get('data-lazy-src') or image_element.get('src', 'https://default-image-url.com/placeholder.jpg')
        else:
         image_url = 'https://default-image-url.com/placeholder.jpg'
        description_element = article.find('a', class_='post-excerpt')
        description = description_element.text.strip() if description_element else ''
        author_element = article.find(
            'div', class_='authors-list')
        author = author_element.text.strip() if author_element else None
        logo = 'https://techpoint.africa/wp-content/uploads/2022/11/Techpoint-logo-500px-2.png'
        logo1 = 'https://flagpedia.net/data/flags/h80/ng.webp'
        website='Techpoint Africa'
        website_url='https://www.businesstechafrica.co.za/category/technology/'
        category = determine_category(title, description)
        priority = 5
        section = 'top'
        articles.append({
            'title': title,
            'badge': badge,
            'description': description,
            'publish_date': publish_date,
            'link': link,
            'logo': logo,
            'logo1': logo1,
            'image_url': image_url,
            'author': author,
            'website': website,
            'website_url': website_url,
            'category': category,
            'priority': priority,
            'section': section

        })

    return articles
def fetch_news_cio():
    url = 'https://cioafrica.co/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div',class_='kt-blocks-post-grid-item-inner-wrap kt-feat-image-align-left kt-feat-image-mobile-align-top')[:1]


    for article in article_collection:
        badge_element = article.find(
            'div', class_='kt-blocks-above-categories').find('a')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find(
            'h4', class_='entry-title')
        title = title_element.text.strip() if title_element else ''
        description_element = article.find('div', class_='entry-content')
        description = description_element.text.strip() if description_element else ''
        publish_element = article.find(
            'div', class_='kt-blocks-date')
        publish_date = publish_element.text.strip() if publish_element else ''
        image_element = article.find('img')
        image_url = ''
        if image_element:
            image_url = image_element.get('data-src', image_element.get('src', ''))

        
        if not image_url:
            image_url = 'path/to/your/placeholder.png'

       
        print(f"Image URL: {image_url}")


        website='CIO Africa'

        author_element = article.find('div', class_='kt-blocks-post-author')
        author = author_element.text.strip() if author_element else ''
        title_element = article.find(
            'h4').find('a')
        link = title_element['href'] if title_element else ''
        logo = 'https://uploads-ssl.webflow.com/601f9af34267b5156d0e830f/61ae1184b6ac6936d1af61a3_CIO-20Africa-20logo.png'
        logo1 = 'https://flagpedia.net/data/flags/h80/ke.webp'
        website_url='https://cioafrica.co/'
        category = determine_category(title, description)
        priority = 6
        section = 'top'
        articles.append({
            'title': title,
            'badge': badge,
            'description': description,
            'publish_date': publish_date,
            'image_url': image_url,
            'link': link,
            'logo': logo,
            'logo1': logo1,
            'author': author,
            'website': website,
            'website_url': website_url,
            'category': category,
            'priority': priority,
            'section': section


        })

    return articles


def fetch_news_techlabari():

    url = 'https://techlabari.com/news/'

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
    }

    response = requests.get(url, headers=headers, timeout=30)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []

    article_collection = soup.find_all(
        'div', class_='p-wrap p-grid p-grid-1'
    )[:1]

    for article in article_collection:

        try:
           
            badge_element = article.find(
                'div', class_='p-categories p-top'
            )
            badge = badge_element.get_text(strip=True) if badge_element else ''

           
            title_element = article.find('h3', class_='entry-title')
            if not title_element:
                continue

            title = title_element.get_text(strip=True)

           
            link_tag = article.find('a', href=True)
            link = link_tag.get('href') if link_tag else ''

           
            desc_element = article.find('div', class_='elementor-post__excerpt')
            description = desc_element.get_text(strip=True) if desc_element else None

           
            publish = article.find('time', class_='date-published')
            publish_date = publish.get_text(strip=True) if publish else 'Recently Added'

            image_url = ''

            img = article.find('img')
            if img:
                image_url = (
                    img.get('data-src') or
                    img.get('data-lazy-src') or
                    img.get('src') or
                    img.get('srcset', '').split(' ')[0] or
                    ''
                )

            # make absolute URL
            if image_url and not image_url.startswith('http'):
                image_url = urljoin(url, image_url)

         
            author_element = article.find('span', class_='meta-separate')
            author = author_element.get_text(strip=True) if author_element else 'Techlabari'

            
            website = 'Techlabari'
            website_url = url

            logo = 'https://techlabari.com/wp-content/uploads/2023/09/TL-Small.png'
            logo1 = 'https://flagpedia.net/data/flags/h80/gh.webp'

            category = determine_category(title, description)
            priority = 7
            section = 'bottom'

          
            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'image_url': image_url,
                'link': link,
                'logo': logo,
                'logo1': logo1,
                'author': author,
                'website': website,
                'website_url': website_url,
                'category': category,
                'priority': priority,
                'section': section
            })

        except Exception as e:
            print(f"Techlabari scrape error: {e}")
            continue

    return articles


def fetch_news_newtimes():
    url = 'https://www.newtimes.co.rw/search?query=technology'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div', class_='col-md-3 article')[-1:]
    for article in article_collection:
        badge_element = article.find(
            'div', class_='article-section')
        badge = badge_element.text.strip() if badge_element else ''
        logo_element = article.find(
            'div', class_='header-icons desktop-icons')
        logo = logo_element.text.strip() if logo_element else ''
        title_element = article.find(
            'div', class_='article-title')
        title = title_element.text.strip() if title_element else ''
        image_element = article.find('img')
        image_url = image_element['src'] if image_element else None
        publish = article.find(
            'div', class_='article-date')
        publish_date = publish.text.strip() if publish else ' Recently Added'
        description_element = article.find('div')
        description = description_element.text.strip() if description_element else ''
        link = article.find('a')['href']
        author_element = article.find('span')
        author = author_element.text.strip() if author_element else ''
        logo = 'https://www.newtimes.co.rw/theme_newtimes/images/logo1.png'
        logo1 = 'https://flagpedia.net/data/flags/h80/rw.webp'
        website='The New Times'
        author = author or website
        website_url='https://www.newtimes.co.rw/search?query=technology'
        category = determine_category(title, description)
        priority = 8
        section = 'bottom'
        articles.append({
            'title': title,
            'badge': badge,
            'logo': logo,
            'publish_date': publish_date,
            'image_url': image_url,
            'link': link,
            'logo': logo,
            'author': author,
            'logo1': logo1,
            'description': description,
            'website': website,
            'website_url': website_url,
            'category': category,
            'priority': priority,
            'section': section

        })

    return articles


def fetch_news_pctechmag():
    url = 'https://pctechmag.com/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')

        articles = []
        article_collection = soup.find_all(
            'div', class_='cs-posts-area cs-posts-area-elementor')[:1]
        for article in article_collection:
            badge_element = article.find(
                'ul', class_='post-categories').find('a')
            badge = badge_element.text.strip() if badge_element else ''
            title_element = article.find(
                'h2', class_='cs-entry__title')
            title = title_element.text.strip() if title_element else ''

            description_element = article.find('p')
            description = description_element.text.strip() if description_element else ''
            publish = article.find(
                'div', class_='cs-meta-date')
            publish_date = publish.text.strip() if publish else ''
            author_element = article.find('span', class_='cs-meta-author-name')
            author = author_element.text.strip() if author_element else ''

            image_element= article.find('img')
            image_url = image_element['src'] if image_element else None
            link = article.find('a')['href']
            logo = 'https://i0.wp.com/pctechmag.com/wp-content/uploads/2015/08/pc-tech-logo-signature.png?ssl=1'
            logo1 = 'https://flagpedia.net/data/flags/h80/ug.webp'
            website='Pc Techmag'
            website_url='https://pctechmag.com/'
            category = determine_category(title, description)
            priority = 9
            section = 'bottom'
            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'image_url': image_url,
                'link': link,
                'author': author,
                'logo': logo,
                'logo1': logo1,
                'website': website,
                'website_url': website_url,
                'category': category,
                'priority': priority,
                'section': section
            })

        return articles
    except Timeout:
            print(f"Error: The request timed out while trying to access {url}.")
    except ConnectionError:
            print(f"Error: Connection error occurred while trying to access {url}.")
    except SSLError:
            print(f"Error: SSL certificate error occurred while trying to access {url}.")
    except RequestException as e:
            print(f"Error: An error occurred: {e}")

    return []


def fetch_news_techcrunch():
    url = 'https://techcrunch.com/tag/africa/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div', class_='wp-block-techcrunch-card wp-block-null')[:1]


    for article in article_collection:
        badge_element = article.find(
            'a', class_='is-taxonomy-category has-text-color has-green-900-color wp-block-tenup-post-primary-term has-small-font-size wp-elements-da2f4b8fa1d086a1fe3f2c2891f7e77e')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find(
            'h3',class_='loop-card__title')
        title = title_element.text.strip() if title_element else ''

        title_element = article.find(
            'h3',class_='loop-card__title').find('a')
        link = title_element['href'] if title_element else ''
        # link = link_element if link_element else None
        publish = article.find(
            'time', class_='loop-card__meta-item loop-card__time wp-block-tc23-post-time-ago')
        publish_date=publish.text.strip() if publish else ''
        image_element = article.find('img')
        image_url = image_element['src'] if image_element else None
        author_element = article.find(
            'ul', class_='loop-card__meta-item loop-card__author-list')
        description = article.find(
            'p', class_='fz-14 lh-20 c-777')
        if description:
            description = description.text.strip()
        else:
            description = None

        author = author_element.text.strip() if author_element else ''
        website='Techcrunch'
        logo = 'https://techcrunch.com/wp-content/themes/tc-23/dist/svg/tc-logo.svg'
        logo1 = 'https://flagpedia.net/data/flags/h80/us.webp'
        
        website_url='https://techcrunch.com/tag/africa/'
        category = determine_category(title, description)
        priority = 11
        section = 'side'
        articles.append({
            'title': title,
            'badge': badge,
            'description': description,
            'publish_date': publish_date,
            'link': link,
            'image_url':  image_url,
            'logo': logo,
            'logo1': logo1,
            'author': author,
            'website': website,
            'website_url': website_url,
            'category': category,
            'priority': priority,
            'section': section
        })

    return articles


def fetch_news_fins():
    url = 'https://fintechnews.africa/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'article', class_='l-post list-post list-post-on-sm m-pos-left')[:1]


    for article in article_collection:
        badge_element = article.find(
            'span', class_='meta-item cat-labels')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find(
            'h2', class_='is-title post-title')
        title = title_element.text.strip() if title_element else ''
        description_element = article.find('div', class_='excerpt')
        description = description_element.text.strip() if description_element else ''
        publish_date = article.find(
            'time', class_='post-date')
        publish_date = publish_date.text.strip() if publish_date else ''

        image_element = article.find('img')
        if image_element:
            if image_element.has_attr('data-src'):
                image_url = image_element['data-src']
            elif image_element.has_attr('src'):
                image_url = image_element['src']
            elif image_element.has_attr('srcset'):
                image_url = image_element['srcset'].split(',')[0].split(' ')[0]  # Taking the first image in srcset
            else:
                image_url = ''
        else:
            image_url = ''
        author_element = article.find('span')
        author = author_element.text.strip() if author_element else ''
        logo = 'https://fintechnews.africa/wp-content/uploads/2021/03/fintech-africa-logo.png'
        website='Fintechnews Africa'
        logo1 = 'https://flagpedia.net/data/flags/h80/gh.webp'
        link = article.find('a')['href']
        website_url='https://fintechnews.africa/'
        category = determine_category(title, description)
        priority = 12
        section = 'side'
        articles.append({
            'title': title,
            'badge': badge,
            'description': description,

            'image_url': image_url,
            'link': link,
            'logo': logo,
            'logo1': logo1,
            'website': website,
            'publish_date': publish_date,
            'author': author,
            'website_url': website_url,
            'category': category,
            'priority': priority,
            'section': section
        })
    return articles

def fetch_news_techarena():
    url = 'https://techtrendske.co.ke/latest/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'li',class_='post-item')[:1]

    for article in article_collection:
        badge_element = article.find(
            'span', class_='post-cat tie-cat-7')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find(
            'h2', class_='post-title')
        title = title_element.text.strip() if title_element else ''

        description_element = article.find('p',class_='post-excerpt')
        description = description_element.text.strip() if description_element else ''
        publish = article.find(
            'span', class_='date meta-item tie-icon')
        publish_date = publish.text.strip() if publish else ''
        author_element = article.find('span', class_='meta-author')
        author = author_element.text.strip() if author_element else ''

        image_element= article.find('img')
        image_url = image_element['src'] if image_element else None
        title_element = article.find(
            'h2', class_='post-title').find('a')
        link = title_element['href'] if title_element else ''
        logo = 'https://techtrendske.co.ke/wp-content/uploads/2024/06/Asset-11mdpi.png'
        logo1 = 'https://flagpedia.net/data/flags/h80/ke.webp'
        website='techtrends'
        website_url='https://techtrendske.co.ke/latest/'
        category = determine_category(title, description)
        priority = 13
        section = 'top'
        articles.append({
            'title': title,
            'badge': badge,
        'description': description,
            'publish_date': publish_date,
            'image_url': image_url,
            'link': link,
            'author': author,
            'logo': logo,
            'logo1': logo1,
            'website': website,
            'website_url': website_url,
            'category': category,
            'priority': priority,
            'section': section

        })

    return articles

def fetch_news_techloy():
    url = 'https://www.techloy.com/tag/news/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div', class_='gh-archive-page-post gh-posts-feed-post')[:1]

    for article in article_collection:
        badge_element = article.find(
            'a', class_='gh-tag-link')
        badge = badge_element.text.strip() if badge_element else ''

        title_element = article.find('h2', class_='gh-archive-page-post-title')
       
        title = title_element.text.strip() if title_element else ''
        publish = article.find(
            'time', class_='gh-post-info__date')
        publish_date = publish.text.strip() if publish else ''
        description_element = article.find('div',class_='post-excerpt')
        description = description_element.text.strip() if description_element else ''

        image_url = ''

      
        img_element = article.find('img')
        if img_element:
            
            image_url = (
                img_element.get('src') or
                img_element.get('data-src') or
                img_element.get('lazy-src') or
                ''
            )

        a_element = article.find('a', class_='gh-archive-page-post-image-wrapper-link')
        if not image_url and a_element:
            style_attr = a_element.get('style')
            if style_attr and 'background-image' in style_attr:
           
                image_url = style_attr.split('background-image: url(')[-1].split(')')[0].strip().strip('"').strip("'")

       
        if image_url and not image_url.startswith('http'):
            image_url = urljoin(url, image_url)
        website='https://www.techloy.com/'
        author_element = article.find('div', class_='gh-post-info__author-wrapper')
        author = author_element.text.strip() if author_element else ''
        title_element = article.find(
            'h2', class_='gh-archive-page-post-title').find('a')
        link = title_element['href'] if title_element else ''
        logo = 'https://gigalayer.com/clients/companies/logo-techloy.png'
        logo1 = 'https://flagpedia.net/data/flags/h80/ng.webp'
        website='Techloy'
        author = author or website
        website_url='https://www.techloy.com/tag/news/'
        category = determine_category(title, description)
        priority = 14
        section = 'side'
        articles.append({
            'title': title,
            'badge': badge,
         
            'publish_date': publish_date,
            'image_url': image_url,
            'link':website + link,
            'logo': logo,
            'logo1': logo1,
            'author': author,
            'website': website,
            'description': description,
            'website_url': website_url,
            'category': category,
            'priority': priority,
            'section': section

        })

    return articles

def fetch_news_construction():
    url = 'https://constructionreviewonline.com/region/news/africa/'
    headers = {
        'User-Agent': (
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
            'AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/58.0.3029.110 Safari/537.3'
        )
    }

   
    response = safe_get(url, headers=headers)
    if not response:
        print("[⚠️] Skipping ConstructionReview — could not fetch data.")
        return []

    try:
        soup = BeautifulSoup(response.text, 'html.parser')
        articles = []
        article_collection = soup.find_all('div', class_='inside-article')[:1]

        for article in article_collection:
            badge_element = article.find('span', class_='post-term-item term-news')
            badge = badge_element.text.strip() if badge_element else ''

            title_element = article.find(
                'h2', class_='gb-headline gb-headline-84923e14 gb-headline-text'
            )
            title = title_element.text.strip() if title_element else ''

            image_element = article.find('img')
            image_url = image_element['src'] if image_element else None

            description_element = article.find('div', class_='dynamic-entry-excerpt')
            description = description_element.text.strip() if description_element else None

            publish_element = article.find(
                'div', class_='gb-headline gb-headline-6df4d39e gb-headline-text'
            )
            publish_date = publish_element.text.strip() if publish_element else 'Recently Added'

            author_element = article.find('span', class_='text-primary')
            author = author_element.text.strip() if author_element else ''

            link_element = article.find('a')
            link = link_element['href'] if link_element else '#'

        
            logo = 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQWGrc39lcGRxWgA5VqjvlR6cIFSL3_0Yo5vA&s'
            logo1 = 'https://flagpedia.net/data/flags/h80/ke.webp'
            website = 'Constructionreview'
            website_url = url
            category = determine_category(title, description)
            priority = 15
            section = 'side'
            author = author or website

            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'image_url': image_url,
                'link': link,
                'logo': logo,
                'logo1': logo1,
                'author': author,
                'website': website,
                'website_url': website_url,
                'category': category,
                'priority': priority,
                'section': section
            })

        print(f"[✅] Fetched {len(articles)} article(s) from ConstructionReview.")
        return articles

    except Exception as e:
        print(f"[⚠️] Error parsing ConstructionReview: {e}")
        return []


def fetch_news_teche():
    url = 'https://techeconomy.ng/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div', class_='jeg_block_container')[:1]


    for article in article_collection:
        badge_element = article.find(
            'div', class_='jeg_post_category').find('span')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find(
            'h3', class_='jeg_post_title')
        title = title_element.text.strip() if title_element else ''
        description_element = article.find('div', class_='jeg_post_excerpt')
        description = description_element.text.strip() if description_element else ''
        image_element= article.find('img')
        image_url = image_element['src'] if image_element else None
        link = article.find('a')['href']
        publish = article.find(
            'div', class_='jeg_meta_date')
        publish_date = publish.text.strip() if publish else 'Recently Added'
        author_element = article.find('dd')
        author = author_element.text.strip() if author_element else ''
        # link_element = article.find(
        #     'h3', class_='jeg_post_title').find('a')
        # link = link_element['href'] if link_element else ''
        website='techeconomy'
        logo = 'https://techeconomy.ng/wp-content/uploads/2025/06/Full-Logo_Blue-with-Tagline-1.png'
     

        logo1 = 'https://flagpedia.net/data/flags/h80/ng.webp'
        author = author or website
       
        website_url='https://techeconomy.ng/'
        category = determine_category(title, description)
        priority = 16
        section = 'top'
        articles.append({
            'title': title,
           
            'badge': badge,

        'description': description,
            'publish_date': publish_date,
            'image_url': image_url,
            'link':link,
            'logo': logo,
            'logo1': logo1,
            'website': website,
            'author': author,
            'website_url': website_url,
            'category': category,
            'priority': priority,
            'section': section
        })

    return articles


# def fetch_news_teche():
#     url = 'https://techeconomy.ng/'
#     headers = {
#         'User-Agent': 'Mozilla/5.0'
#     }

#     response = requests.get(url, headers=headers)
#     soup = BeautifulSoup(response.text, 'html.parser')

#     articles = []

#     article_collection = soup.select('article.jeg_post')

#     print("FOUND ARTICLES:", len(article_collection))  #

#     for article in article_collection[:1]:  

#         if not article:
#             continue

       
#         title = ''
#         link = ''
#         title_container = article.find('h3', class_='jeg_post_title')
#         if title_container:
#             a_tag = title_container.find('a')
#             if a_tag:
#                 title = a_tag.get_text(strip=True)
#                 link = a_tag.get('href', '')

     
#         description = ''
#         desc_tag = article.find('div', class_='jeg_post_excerpt')
#         if desc_tag:
#             description = desc_tag.get_text(strip=True)

     
#         image_url = None
#         img_tag = article.find('img')
#         if img_tag:
#             image_url = img_tag.get('src')

#         publish_date = 'Recently Added'
#         date_tag = article.find('div', class_='jeg_meta_date')
#         if date_tag:
#             publish_date = date_tag.get_text(strip=True)

#         author = ''
#         author_tag = article.find('span', class_='jeg_meta_author')
#         if author_tag:
#             author = author_tag.get_text(strip=True)

#         website = 'techeconomy'
#         author = author or website

#         articles.append({
#             'title': title,
#             'description': description,
#             'publish_date': publish_date,
#             'image_url': image_url,
#             'link': link,
#             'logo': 'https://techeconomy.ng/wp-content/uploads/2025/06/Full-Logo_Blue-with-Tagline-1.png',
#             'logo1': 'https://flagpedia.net/data/flags/h80/ng.webp',
#             'website': website,
#             'author': author,
#             'website_url': url,
#             'category': determine_category(title, description),
#             'priority': 16,
#             'section': 'top'
#         })

#     return articles

def fetch_news_app():
    url = 'https://www.appsafrica.com/latest-news/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div', class_='archive-list-out')[:1]


    for article in article_collection:
        badge_element = article.find(
            'span', class_='side-list-cat')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find(
            'h2')
        title = title_element.text.strip() if title_element else ''
        

        publish = article.find(
            'time')
        publish_date = publish if publish else 'Recently Added'
        image_element = article.find('img')
        image_url = image_element['src'] if image_element else None
        author_element = article.find(
            'span', class_='post_author_name')
        description = article.find(
            'div', class_='archive-list-text left relative').find('p')
        if description:
            description = description.text.strip()
        else:
            description = None

        author = author_element.text.strip() if author_element else ''
        website_url='https://www.appsafrica.com/latest-news/'
        

        link = None

        link_element = article.find("img", {"data-permalink": True})

        if link_element:
            link = link_element.get("data-permalink")

        logo = 'https://www.appsafrica.com/wp-content/uploads/2017/04/appsafrica_africatechinsight-1.png'
        logo1 = 'https://flagpedia.net/data/flags/h80/gb.webp'
        website='Apps Africa'
        author = author or website

        website='Apps Africa'
        author = author or website
        category = determine_category(title, description)
        priority = 17
        section = 'bottom'
        articles.append({
            'title': title,
            'badge': badge,
            'description': description,
            'publish_date': publish_date,
            'link': link,
            'image_url': image_url,
            'logo': logo,
            'logo1': logo1,
            'author': author,
            'website': website,
            'website_url': website_url,
            'category': category,
            'priority': priority,
            'section': section

        })

    return articles

def fetch_news_village():
    url = 'https://innovation-village.com/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all('article', class_='l-post grid-post grid-base-post')[:1]

    for article in article_collection:
        title_element = article.find(
        'h2', class_='is-title post-title')
        title = title_element.text.strip() if title_element else ''
        badge_element = article.find(
        'span', class_='cat-labels cat-labels-overlay c-overlay p-bot-left')
        badge = badge_element.text.strip() if badge_element else ''
        publish = article.find(
        'time', class_='post-date')
        publish_date = publish.text.strip() if publish else ''
        description = article.find(
        'div', class_='excerpt')
        if description:
            description = description.text.strip()
        else:
            description = None
        image_element = article.find('span')
        image_url = image_element['data-bgsrc'] if image_element else None

        
        if not image_url:
            style_element = article.find('div', style=re.compile(r'background-image:\s*url\((.*?)\)'))
            if style_element:
                
                background_url_match = re.search(r'url\((.*?)\)', style_element['style'])
                if background_url_match:
                    image_url = background_url_match.group(1).strip('"\'')
        author_element = article.find('span', class_='meta-item post-author')
        author = author_element.text.strip() if author_element else ''
        link = article.find('a')['href']
        logo = 'https://images.crunchbase.com/image/upload/c_pad,h_256,w_256,f_auto,q_auto:eco,dpr_1/knkcd79vpj9bvxjgbkmp'
        logo1 = 'https://flagpedia.net/data/flags/h80/ng.webp'
        website='Innovation Village'
        website_url='https://innovation-village.com/'
        category = determine_category(title, description)
        priority = 18
        section = 'top'

        articles.append({
            'title': title,
            'badge': badge,
            'description': description,
            'publish_date': publish_date,
            'link': link,
            'image_url': image_url,
            'logo': logo,
            'logo1': logo1,
            'author': author,
            'website': website,
            'website_url': website_url,
            'category': category,
            'priority': priority,
            'section': section

        })

    return articles
def fetch_news_upstream():
    url = 'https://campustechnology.com/articles/list/artificial-intelligence.aspx'
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = safe_get(url, headers=headers)
    if not response:
        print("[⚠️] Skipping UpstreamOnline — connection failed.")
        return []

    try:
        soup = BeautifulSoup(response.text, 'html.parser')
        articles = []
        article_collection = soup.find_all('div')[:1]

        for article in article_collection:
            title_element = article.find('h3', class_='title')
            title = title_element.text.strip() if title_element else ''
            link_tag = title_element.find('a') if title_element else None

            link = link_tag['href'] if link_tag and 'href' in link_tag.attrs else ''
            full_link = link if link.startswith('http') else url + link.lstrip('/')

            description = article.find('p', class_='summary')
            if description:
                description = description.text.strip()
            else:
                description = None
            date_tag = article.find('li', class_='nodate')
            publish_date = date_tag.text.strip() if date_tag else 'Recently Added'
            logo= 'https://techweez.com/wp-content/uploads/2024/04/samp.png',
            author_element = article.find('div', class_='jeg_post_author')
            author = author_element.text.strip() if author_element else ''
            website_url='https://campustechnology.com/articles/list/artificial-intelligence.aspx'
            image_tag = article.find('img')
            image_url = image_tag['src'] if image_tag and 'src' in image_tag.attrs else None

            articles.append({
                'title': title,
                'badge': '',
                'description': description,
                'publish_date': publish_date,
                'image_url': image_url,
                'link': full_link,
                'logo': 'https://campustechnology.com/articles/list/~/media/edu/campustechnology/campustechnologywhitetagline.ashx',
                'logo1': 'https://flagpedia.net/data/flags/h80/ke.webp',
                'author': author,
                'website': 'Upstream',
                'website_url': website_url,
                'category': determine_category(title, description),
                'priority': 19,
                'section': 'bottom'
            })

        print(f"[✅] Fetched {len(articles)} article(s) from UpstreamOnline.")
        return articles

    except Exception as e:
        print(f"[⚠️] Error parsing UpstreamOnline: {e}")
        return []


def fetch_news_afrimirror():
    url = 'https://theafricanmirror.africa/category/science-tech-and-innovation/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div', class_='ht-post-wrapper ht-clearfix')[:1]

    for article in article_collection:
        badge_element = article.find(
            'span', class_='entry-categories').find('a')
        badge = badge_element.text.strip() if badge_element else ''
        logo_element = article.find(
            'div', class_='ht-container')
        logo = logo_element if logo_element else ''
        title_element = article.find(
            'h3', class_='entry-title')
        title = title_element.text.strip() if title_element else ''


        description = article.find('div', class_='entry-content')
        if description:
            description = description.text.strip()
            if len(description) > 50:
                description = description[:350]
        else:
            description = ''
        publish = article.find(
            'div', class_='ht-post-date')
        publish_date = publish.text.strip() if publish else ''
        image_element = article.find('div', class_='entry-thumb-container').find('img')
        image_url = image_element['src'] if image_element else None
        author_element = article.find('span', class_='author vcard')
        author = author_element.text.strip() if author_element else ''
        title_element = article.find(
            'h3', class_='entry-title').find('a')
        link = title_element['href'] if title_element else ''
        logo = 'https://theafricanmirror.africa/wp-content/uploads/2023/02/TAM-NAME_494x80px_BW-For-Africa-Always_redline.png'
        logo1 = 'https://flagpedia.net/data/flags/h80/za.webp'
        website='The African Mirror'
        website_url='https://theafricanmirror.africa/category/science-tech-and-innovation/'
        category = determine_category(title, description)
        priority = 20
        section = 'top'

        articles.append({
            'title': title,
            'badge': badge,
            'description': description,
            'publish_date': publish_date,
            'link': link,
            'image_url': image_url,
            'logo': logo,
            'logo1': logo1,
            'author': author,
            'website': website,
            'website_url': website_url,
            'category': category,
            'priority': priority,
            'section': section

        })

    return articles

def fetch_news_egypt():
    url = 'https://www.dailynewsegypt.com/category/business/it-a-telecom/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div', class_='p-wrap p-grid p-grid-1')[:1]


    for article in article_collection:
        badge_element = article.find(
            'div', class_='p-categories p-top')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find(
            'h3', class_='entry-title')
        title = title_element.text.strip() if title_element else ''
        
        description = article.find(
            'p', class_='entry-summary')
        if description:
            description = description.text.strip()
        else:
            description = None
        website='Daily News'
        publish = article.find(
            'span', class_='meta-el meta-update')
        publish_date = publish.text.strip() if publish else ''
        image_url = article.find('img')['src']
       
        author_element = article.find('span', class_='meta-el meta-author')
        author = author_element.text.strip() if author_element else ''
        link = article.find('a')['href']
        logo = 'https://d1b3667xvzs6rz.cloudfront.net/2023/10/Dailynews-logo.png'
        logo1 = 'https://flagpedia.net/data/flags/h80/eg.webp'

        website_url='https://www.dailynewsegypt.com/category/business/it-a-telecom/'
        category = determine_category(title, description)
        priority = 21

        section = 'side'

        articles.append({
            'title': title,
            'badge': badge,
            'description': description,
            'publish_date': publish_date,
            'link': link,
            'image_url': image_url,
            'logo': logo,
             'logo1': logo1,
            'author': author,
             'website': website,
             'website_url': website_url,
             'category': category,
             'priority': priority,
             'section': section

        })

    return articles

def fetch_news_techview():
    url = 'https://africanreview.com/construction'
    base_url = "https://www.constructionreviewonline.com"
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div',class_='system-unpublished')[:1]

    for article in article_collection:
        badge_element = article.find(
            'div', class_='post-categories')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find('h2', class_='article-title')

        a_tag = title_element.find('a') if title_element else None

      
        title = a_tag.get_text(strip=True) if a_tag else ''

        description_element = article.find('section',class_='article-intro')
        description = description_element.text.strip() if description_element else ''
        publish = article.find(
            'div', class_='post-date')
        publish_date = publish.text.strip() if publish else 'Recently Added'
        author_element = article.find('div', class_='post-author')
        author = author_element.text.strip() if author_element else ''

        image_span = article.find('span', itemprop='image')
        image_url = ''
        if image_span:
         image_element = image_span.find('img', itemprop='url')  # Find the image within the span
         if image_element:
          image_url = image_element['src']
        if image_url.startswith('/'): 
            
            base_url = 'https://africanreview.com/'  
            image_url = base_url + image_url
        a_tag = article.find('h2').find('a') if article.find('h2') else None

        title = a_tag.get_text(strip=True) if a_tag else ''
        link = urljoin(base_url, a_tag['href']) if a_tag and a_tag.has_attr('href') else ''

        logo = 'https://africanreview.com/templates/ja_teline_v/images/logo.svg'
        logo1 = 'https://flagpedia.net/data/flags/h80/zw.webp'
        website='Africa Review'
        author = author or website
        website_url='https://africanreview.com/construction'
        category = determine_category(title, description)
        priority = 22
        section = 'bottom'
        articles.append({
            'title': title,
            'badge': badge,
        'description': description,
            'publish_date': publish_date,
            'image_url': image_url,
            'link': link,
            'author': author,
            'logo': logo,
            'logo1': logo1,
            'website': website,
            'website_url': website_url,
            'category': category,
            'priority': priority,
            'section': section
        })

    return articles

def fetch_news_datacenter():
    url = 'https://fintech.global/tag/africa/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div', class_='td-block-span6')[:1]


    for article in article_collection:
        badge_element = article.find(
            'div', class_='td-module-thumb').find('a')
        badge = badge_element.text.strip() if badge_element else ''
        publish_element = article.find(
            'time', class_='entry-date updated td-module-date')
        publish_date = publish_element.text.strip() if publish_element else ''
        title_element = article.find('h3',class_='entry-title td-module-title')
        title = title_element.text.strip() if title_element else ''

        description_element = article.find('div',class_='td-excerpt')
        description = description_element.text.strip() if description_element else ''
        image_element = article.find('img')
        image_url = image_element['src'] if image_element else ''
        link = article.find('a')['href']
       
        logo_element = article.find('img',class_='td-retina-data lazyloaded')
        logo = logo_element['data-src-webp'] if logo_element else ''
        logo = 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRv6-PWLL_NA5Nfd5lKc_FCJ2pLHWIKN_K0NA&s'
        author_element = article.find('span', class_='cs-meta-author-name')
        author = author_element.text.strip() if author_element else ''
        logo1 = 'https://flagpedia.net/data/flags/h80/gb.webp'
        website_url='https://fintech.global/tag/africa/'
        website='Fintech Global'
        category = determine_category(title, description)
        priority='23'
        section = 'bottom'
        articles.append({
            'title': title,
            'badge': badge,
            'description': description,
            'publish_date': publish_date,
            'link': link,
            'image_url': image_url,
            'logo': logo,
            'logo1': logo1,
            'author': author,
            'website': website,
            'website_url': website_url,
            'category': category,
            'priority': priority,
            'section': section

        })

    return articles
def fetch_news_coinweez():
    url = 'https://memeburn.com/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div', class_='col-lg-10 offset-lg-1')[:1]
    for article in article_collection:
        badge_element = article.find(
            'span', class_='category')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find(
            'h2', class_='entry-title')
        title = title_element.text.strip() if title_element else ''

        description_element = article.find('p')
        description = description_element.text.strip() if description_element else ''
        publish = article.find(
            'span', class_='posted-on')
        publish_date = publish.text.strip() if publish else ''
        author_element = article.find('span', class_='author vcard')
        author = author_element.text.strip() if author_element else ''
        image_url = None

   
        image_element = article.find('img')
        if image_element and image_element.get('src'):
            image_url = image_element['src']
        else:
       
            div = article.find('div', class_='img')
            if div and 'style' in div.attrs:
                match = re.search(r'url\((.*?)\)', div['style'])
                if match:
                    image_url = match.group(1)
       

        title_element = article.find('h2', class_='entry-title')

        if title_element:
            
            title = title_element.get_text(strip=True)

            link_tag = title_element.find('a')
            link = link_tag['href'] if link_tag and 'href' in link_tag.attrs else None
        else:
            title = ''
            link = None

        logo = 'https://memeburn.com/wp-content/uploads/2019/04/ic_memeburn.png'
        logo1 = 'https://flagpedia.net/data/flags/h80/za.webp'
        website='Memeburn'
        author = author or website
        website_url='https://coinweez.com/category/news/'
        category = determine_category(title, description)
        priority='24'
        section = 'bottom'
        articles.append({
            'title': title,
            'badge': badge,
            'description': description,
            'publish_date': publish_date,
            'image_url': image_url,
            'link': link,
            'author': author,
            'logo': logo,
            'logo1': logo1,
            'website': website,
            'website_url': website_url,
            'category': category,
            'priority': priority,
            'section': section

        })

    return articles


def fetch_news_shega():
    url = 'https://shega.co/analysis'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'section',class_='w-full')[:1]

    for article in article_collection:
        title_element = article.find(
            'a',class_='text-gray')
        title = title_element.text.strip() if title_element else ''
        badge_element = article.find(
            'a')
        badge = badge_element.text.strip() if badge_element else ''

        description_element = article.find('p',class_='text-gray')
        description = description_element.text.strip() if description_element else ''
        publish = article.find(
            'div', class_='flex-item')
        publish_date = publish.text.strip() if publish else 'Recently Added'
        author_element = article.find('p')
        author = author_element.text.strip() if author_element else ''

        image_element= article.find('img')
        image_url = image_element['src'] if image_element else None


        if image_url and not image_url.startswith('http'):
         image_url = urljoin(url, image_url)

        title_element = article.find('a')
        title = title_element.get_text(strip=True) if title_element else ''

        link = title_element['href'] if title_element and 'href' in title_element.attrs else None

        if link and link.startswith('/'):
           link = 'https://shega.co' + link


        logo = 'https://shega.co/_next/image?url=%2Flogo.png&w=128&q=75'
        logo1 = 'https://flagpedia.net/data/flags/h80/et.webp'
        website='Shega'
        author = author or website
        website_url='https://shega.co/'
        category = determine_category(title, description)
        articles.append({
            'title': title,
          
        'description': description,
            'publish_date': publish_date,
            'image_url': image_url,
            'link': website+link,
            'author': author,
            'logo': logo,
            'logo1': logo1,
            'website': website,
            'badge': badge,
            'website_url': website_url,
            'category': category,

        })

    return articles
def fetch_news_wallstreet():
    url = 'https://technomag.co.zw/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div', class_='list-item')[:1]
    for article in article_collection[:1]:
        badge_element = article.find(
            'a', class_='cat-3')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find(
            'h3', class_='post__title')
        title = title_element.text.strip() if title_element else ''

        description_element = article.find('div',class_='excerpt')
        description = description_element.text.strip() if description_element else ''
        publish = article.find(
            'time', class_='time published')
        publish_date = publish.text.strip() if publish else ''
        author_element = article.find('span', class_='entry-author')
        author = author_element.text.strip() if author_element else ''

        image_url = ''

        # Handle <img> tag if it exists
        img_element = article.find('img')
        if img_element:
            # Try to extract the image from multiple possible attributes
            image_url = (
                img_element.get('src') or
                img_element.get('data-src') or
                img_element.get('lazy-src') or
                ''
            )

        # Handle <a> tag with background image in style attribute
        a_element = article.find('a', class_='gh-archive-page-post-image-wrapper-link')
        if not image_url and a_element:
            style_attr = a_element.get('style')
            if style_attr and 'background-image' in style_attr:
                # Extract the URL from background-image style
                image_url = style_attr.split('background-image: url(')[-1].split(')')[0].strip().strip('"').strip("'")

        # Ensure the image URL is complete (handling relative URLs)
        if image_url and not image_url.startswith('http'):
            image_url = urljoin(url, image_url)
        link = article.find('a')['href']
        logo = 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTv5nbtiiM8poCRk5PoBC7pM0jpfGv8vLUANA&s'
        logo1 = 'https://flagpedia.net/data/flags/h80/zw.webp'
        website='kenyan Wall Street'
        website_url='https://technomag.co.zw/'
        category = determine_category(title, description)
        priority = 13
        articles.append({
            'title': title,
            'badge': badge,
            'description': description,
            'publish_date': publish_date,
            'image_url': image_url,
            'link': link,
            'author': author,
            'logo': logo,
            'logo1': logo1,
            'website': website,
            'website_url': website_url,
            'category': category,

        })

    return articles

def fetch_news_afro():
    url = 'https://afrotech.com/category/technology'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div', class_='lg:flex lg:border-b lg:border-neutral-light-gray')[:1]


    for article in article_collection:
        badge_element = article.find('span')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find(
            'span', class_='line-clamp-3 lg:line-clamp-2')
        title = title_element.text.strip() if title_element else ''
        description_element = article.find('span',class_='line-clamp-2')
        description = description_element.text.strip() if description_element else ''
        image_element= article.find('img')
        if image_element:
    # Try direct src (Next.js pages hide real image inside url=)
            src = image_element.get('src')
            if src:
                parsed = urllib.parse.urlparse(src)
                query = urllib.parse.parse_qs(parsed.query)

                # If Next.js ?url= exists
                if 'url' in query:
                    decoded = urllib.parse.unquote(query['url'][0])
                    image_url = decoded
                else:
                    image_url = src

            # If src didn't work, try srcset
            if not image_url:
                srcset = image_element.get('srcset')
                if srcset:
                    first_src = srcset.split(',')[0].strip().split(' ')[0]
                    parsed = urllib.parse.urlparse(first_src)
                    query = urllib.parse.parse_qs(parsed.query)
                    if 'url' in query:
                        decoded = urllib.parse.unquote(query['url'][0])
                        image_url = decoded
                    else:
                        image_url = first_src

        title_element = article.find('a', class_='text-h3-mobile')

# Extract title text
        title = title_element.get_text(strip=True) if title_element else ''

        # Extract href (link)
        link = article.find('a')['href']

        # Optional: add full site URL if it's a relative path
        if link and link.startswith('/'):
            link = 'https://afrotech.com/' + link

        publish = article.find('p', class_='text-label-mobile uppercase lg:text-label-desktop')
        publish_date = publish.text.strip() if publish else 'Recently Added'
        author_element = article.find('span')
        author = author_element.text.strip() if author_element else ''

        website='AfricaTech'
        logo = 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSffzlR1LaqFli4ufyjNCZgdWYEHfN8Z6QfUg&s'
        # logo = '<div style="background-color:black; display:inline-block; padding:5px;"><img src="https://techeconomy.ng/wp-content/uploads/2023/09/Logo-white-1.png" alt="Logo"></div>';

        logo1 = 'https://flagpedia.net/data/flags/h80/us.webp'
        author = author or website
        website_url='https://techeconomy.ng/'
        category = determine_category(title, description)
        priority = 16
        section = 'bottom'
        articles.append({
            'title': title,
            # 'badge': badge,
            'badge': badge,

        'description': description,
        'publish_date': publish_date,
        'image_url': image_url,
        'link':link,
        'logo': logo,
        'logo1': logo1,
        'website': website,
        'author': author,
        'website_url': website_url,
        'category': category,
        'priority': priority,
        'section': section
        })

    return articles

def fetch_news_angola():
    url = 'https://www.menosfios.com/en'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div', class_='td-block-row')[:1]

    for article in article_collection:
        badge_element = article.find(
            'a', class_='td-post-category')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find(
            'h3', class_='entry-title td-module-title').find('a')
        title = title_element.text.strip() if title_element else ''
        # image_element = article.find(
        #     'img', class_='attachment-csco-thumbnail-uncropped size-csco-thumbnail-uncropped wp-post-image ls-is-cached lazyloaded')
        # image_url = image_element if image_element else None
        description = article.find(
            'div', class_='desc-content text-limit-4 mb-12')
        if description:
            description = description.text.strip()
        else:
            description = None
        publish = article.find(
            'span', class_='td-post-date').find('time')
        publish_date = publish.text.strip() if publish else ''
        website='Menosfios'
        image_element = article.find('div', class_='td-module-image').find('img')
        image_url = image_element['data-img-url'] if image_element else None
        # image_url = article.find('img')['src']
        author_element = article.find('span', class_='td-post-author-name')
        author = author_element.text.strip() if author_element else ''
        link = article.find('a')['href']
        logo = 'https://www.menosfios.com/wp-content/uploads/2014/12/logo-retina-2.png'
        logo1 = 'https://flagpedia.net/data/flags/h80/ao.webp'
        website_url='https://www.menosfios.com/en'
        category = determine_category(title, description)

        articles.append({
            'title': title,
            'badge': badge,
            'description': description,
            'publish_date': publish_date,
            'image_url': image_url,
            'link': link,
            'logo': logo,
              'logo1': logo1,
            'author': author,
             'website': website,
             'website_url': website_url,
             'category': category,
            #  'priority': priority
        })

    return articles
def fetch_news_namibia():
    url = 'https://www.namibian.com.na/?s=technology'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div', class_='col-sm-12 col-md-3 wp-block-ps-post-category')[:1]


    for article in article_collection:
        badge_element = article.find(
            'div', class_='post-content').find('a')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find(
            'h3',class_='ps-no-underline ps-text-base ps-leading-tight ps-font-semibold ps-mb-1')
        title = title_element.text.strip() if title_element else ''

        description_element = article.find('div',class_='td-excerpt')
        description = description_element.text.strip() if description_element else ''
        publish_times = article.find(
            'time', class_='ps-text-xs')
        publish_date = publish_times.text.strip() if publish_times else ''
        author_element = article.find('time')
        author = author_element.text.strip() if author_element else ''
        image_element = article.find('img')
        image_url = image_element['src'] if image_element else ''
        logo = 'https://d4f7y6nbupj5z.cloudfront.net/wp-content/uploads/2024/01/Namibian_logo-1-1.png'
        logo1 = 'https://flagpedia.net/data/flags/h80/na.webp'
        website='Namibian'
        link = article.find('a',class_='ps-no-underline ps-leading-tight ps-text-blockBlack')['href']
        category = determine_category(title, description)
        website_url='https://www.africatechschools.com/'
        priority=27
        articles.append({
            'title': title,
            'badge': badge,
            'description': description,
            'publish_date': publish_date,
            'link': link,
            'image_url': image_url,
            'logo': logo,
             'logo1': logo1,
            'author': author,
             'website': website,
             'website_url': website_url,
             'category': category,
            #  'priority': priority

        })

    return articles

def fetch_news_spaces():
    url = 'https://www.itnewsafrica.com/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')

        articles = []
        article_collection = soup.find_all('div',class_='td_block_inner td-mc1-wrap')
        for article in article_collection[:1]:
            badge_element = article.find(
                'a', class_='td-post-category')
            badge = badge_element.text.strip() if badge_element else ''
            title_element = article.find( 'h3',class_='entry-title td-module-title')
            title = title_element.text.strip() if title_element else ''
            image_element = article.find('img')
            image_url = image_element['src'] if image_element else None
            # description = article.find('p', class_='m-0').text.strip()
            publish = article.find(
                'time', class_='entry-date updated td-module-date')
            publish_date = publish.text.strip() if publish else ''
            image_element = article.find('img')
            if image_element and image_element.get('src'):
                image_url = image_element['src']
            # If no <img>, try <span> with data-img-url
            else:
                span_element = article.find('span', attrs={'data-img-url': True})
                image_url = span_element['data-img-url'] if span_element else None
            description = article.find(
                'p')
            if description:
                description = description.text.strip()
            else:
                description = ''
            logo = 'https://www.itnewsafrica.com/wp-content/uploads/2025/08/full-hd_png_logo_white.png'
            link = article.find('a')['href']
            author_element = article.find('span', class_='attribution-and-updated_author-text__tITz2')
            author = author_element.text.strip() if author_element else ''

            logo1 = 'https://flagpedia.net/data/flags/h80/za.webp'
            website_url='https://www.itnewsafrica.com/'
            website='It News Africa'

            category = determine_category(title, description)
            # priority='34'
            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'link': link,
                'image_url': image_url,
                'logo': logo,
                'logo1': logo1,
                'author': author,
                'website': website,
                'website_url': website_url,
                'category': category

            })

        return articles
    except Timeout:
        print(f"Error: The request timed out while trying to access {url}.")
    except ConnectionError:
        print(f"Error: Connection error occurred while trying to access {url}.")
    except SSLError:
        print(f"Error: SSL certificate error occurred while trying to access {url}.")
    except RequestException as e:
        print(f"Error: An error occurred: {e}")

    return []


def fetch_news_space():
    url = 'https://www.connectingafrica.com/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        articles = []
        article_collection = soup.find_all(
            'div', class_='ContentPreview')
        for article in article_collection[:1]:
            badge_element = article.find(
                'a', class_='td-post-category')
            badge = badge_element.text.strip() if badge_element else ''
            title_element = soup.find('a', class_='ListPreview-Title')

            # Extract only the title text
            title = title_element.get_text(strip=True) if title_element else ''
            image_element = article.find('img')
            image_url = image_element['data-src'] if image_element else None
            # description = article.find('p', class_='m-0').text.strip()
            publish = article.find(
                'span', class_='listPreview-date')
            publish_date = publish.text.strip() if publish else ''
            # image_element = article.find('img')['src']
            # image_url = image_element if image_element else None
            description = article.find(
                'div')
            if description:
                description = description.text.strip()
            else:
                description = ''
            logo = 'https://eu-images.contentstack.com/v3/assets/blta47798dd33129a0c/blt85a4eec1580c5a65/66fae0a9b162c5985da36781/Logo_-_Connecting_Africa.svg?width=476&auto=webp&quality=80&disable=upscale'
            title_element = article.find('a', class_='ListPreview-Title')
            link = title_element['href'] if title_element and 'href' in title_element.attrs else None

            # Optional: make full URL if it’s relative (starts with /)
            if link and link.startswith('/'):
                link = 'https://www.connectingafrica.com' + link
            author_element = article.find('a', class_='Contributors-ContributorName')
            author = author_element.text.strip() if author_element else ''

            logo1 = 'https://flagpedia.net/data/flags/h80/za.webp'
            website_url='https://www.tanzaniatech.one/news/'
            website='Space In Africa'

            category = determine_category(title, description)
            # priority='34'
            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'link': link,
                'image_url': image_url,
                'logo': logo,
                'logo1': logo1,
                'author': author,
                'website': website,
                'website_url': website_url,
                'category': category

            })

        return articles
    except Timeout:
        print(f"Error: The request timed out while trying to access {url}.")
    except ConnectionError:
        print(f"Error: Connection error occurred while trying to access {url}.")
    except SSLError:
        print(f"Error: SSL certificate error occurred while trying to access {url}.")
    except RequestException as e:
        print(f"Error: An error occurred: {e}")

    return []

def fetch_news_techbuild():
    url = 'https://techibytes.com/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')

        articles = []
        article_collection = soup.find_all('div',class_='news-list-wrap')
        for article in article_collection[:1]:
            badge_element = article.find(
                'li', class_='cat-item')
            badge = badge_element.text.strip() if badge_element else ''
            title_element = article.find( 'h2',class_='post-title')
            title = title_element.text.strip() if title_element else ''
            image_element = article.find('img')
            image_url = image_element['src'] if image_element else None
            # description = article.find('p', class_='m-0').text.strip()
            publish = article.find(
                'time', class_='entry-date published')
            publish_date = publish.text.strip() if publish else ''
            image_element = article.find('img')
            if image_element and image_element.get('src'):
                image_url = image_element['src']
            # If no <img>, try <span> with data-img-url
            else:
                span_element = article.find('span', attrs={'data-img-url': True})
                image_url = span_element['data-img-url'] if span_element else None
            description = article.find(
                'div',class_='post-excerpt')
            if description:
                description = description.text.strip()
            else:
                description = ''
            logo = 'https://i1.feedspot.com/8553793.jpg?t=1752126422'
            link = article.find('a')['href']
            author_element = article.find('span', class_='author vcard')
            author = author_element.text.strip() if author_element else ''

            logo1 = 'https://flagpedia.net/data/flags/h80/ng.webp'
            website_url='https://techibytes.com/'
            website='Techibytes'

            category = determine_category(title, description)
            # priority='34'
            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'link': link,
                'image_url': image_url,
                'logo': logo,
                'logo1': logo1,
                'author': author,
                'website': website,
                'website_url': website_url,
                'category': category

            })

        return articles
    except Timeout:
        print(f"Error: The request timed out while trying to access {url}.")
    except ConnectionError:
        print(f"Error: Connection error occurred while trying to access {url}.")
    except SSLError:
        print(f"Error: SSL certificate error occurred while trying to access {url}.")
    except RequestException as e:
        print(f"Error: An error occurred: {e}")

    return []


def fetch_news_pocit():
    url = 'https://peopleofcolorintech.com/category/articles/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    article_collection = soup.find_all(
        'div', class_='content-header-single')[:1]
    for article in article_collection:
        badge_element = article.find('span')
        badge = badge_element.text.strip() if badge_element else ''
        title_element = article.find(
            'h2', class_='content-title')
        title = title_element.text.strip() if title_element else ''
        # Assuming 'article' is the <article> element you posted
        entry_div = article.find('div', class_='entry')  # get the div.entry

        if entry_div:
            p_tag = entry_div.find('p')  # get the <p> inside it
            if p_tag:
                description = p_tag.get_text(strip=True)
            else:
                description = ''
        else:
            description = ''

        print(description)


        image_element= article.find('img')
        image_url = image_element['src'] if image_element else None

        # title_element = article.find('span',class_='line-clamp-3 lg:line-clamp-2')
        # link = title_element if title_element  else ''

        base_url = "https://peopleofcolorintech.com"
        link = None  # ensure variable always exists

        link_element = article.find("h2", class_="content-title")

        if link_element:
            a_tag = link_element.find("a")
            if a_tag:
                link = a_tag.get("href")

        # If the link is relative, convert to absolute
                if link.startswith("/"):
                    link = website_url.rstrip("/") + link
        publish = article.find('p', class_='text-label-mobile uppercase lg:text-label-desktop')
        publish_date = publish.text.strip() if publish else 'Recently Added'
        author_element = article.find('span')
        author = author_element.text.strip() if author_element else ''

        website='POCIT'
        logo = 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSffzlR1LaqFli4ufyjNCZgdWYEHfN8Z6QfUg&s'
        # logo = '<div style="background-color:black; display:inline-block; padding:5px;"><img src="https://techeconomy.ng/wp-content/uploads/2023/09/Logo-white-1.png" alt="Logo"></div>';

        logo1 = 'https://flagpedia.net/data/flags/h80/us.webp'
        author = author or website
        website_url='https://techeconomy.ng/'
        category = determine_category(title, description)
        priority = 16
        section = 'bottom'
        articles.append({
        'title': title,
        # 'badge': badge,
        'badge': badge,

        'description': description,
        'publish_date': publish_date,
        'image_url': image_url,
        'link':link,
        'logo': logo,
        'logo1': logo1,
        'website': website,
        'author': author,
        'website_url': website_url,
        'category': category,
        'priority': priority,
        'section': section
        })

    return articles

def fetch_news_thisweek():
    url = 'https://www.thisweekinfintech.com/tag/africa/'
    base_url='https://www.thisweekinfintech.com/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')

        articles = []
        article_collection = soup.find_all('article')
        for article in article_collection[:1]:
            badge_element = article.find(
                'a', class_='td-post-category')
            badge = badge_element.text.strip() if badge_element else ''
            title_element = article.find( 'h3',class_='gh-card-title')
            title = title_element.text.strip() if title_element else ''
            image_element = article.find('img')
            image_url = image_element['src'].strip()

            if image_url.startswith('/'):
                image_url = base_url + image_url


            # description = article.find('p', class_='m-0').text.strip()
            publish = article.find(
                'time', class_='gh-card-date')
            publish_date = publish.text.strip() if publish else ''
            image_element = article.find('img')
            if image_element and image_element.get('src'):
                image_url = image_element['src']
            # If no <img>, try <span> with data-img-url
            else:
                span_element = article.find('span', attrs={'data-img-url': True})
                image_url = span_element['data-img-url'] if span_element else None
            description = article.find(
                'div',class_='gh-card-excerpt')
            if description:
                description = description.text.strip()
            else:
                description = ''
            logo = 'https://www.thisweekinfintech.com/content/images/2023/04/TWIF-logo.png'
            link = article.find('a',class_='gh-card-link')['href']
            if link.startswith('/'):
             link = base_url + link
            author_element = article.find('span', class_='gh-card-author')
            author = author_element.text.strip() if author_element else ''

            logo1 = 'https://flagpedia.net/data/flags/h80/za.webp'
            website_url='https://www.itnewsafrica.com/'
            website='It News Africa'

            category = determine_category(title, description)
            priority='34'
            section = 'side'
            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'link': link,
                'image_url': image_url,
                'logo': logo,
                'logo1': logo1,
                'author': author,
                'website': website,
                'website_url': website_url,
                'category': category,
                'priority': priority,
                'section': section

            })

        return articles
    except Timeout:
        print(f"Error: The request timed out while trying to access {url}.")
    except ConnectionError:
        print(f"Error: Connection error occurred while trying to access {url}.")
    except SSLError:
        print(f"Error: SSL certificate error occurred while trying to access {url}.")
    except RequestException as e:
        print(f"Error: An error occurred: {e}")

    return []


def fetch_news_mari():
    url = 'https://www.mariblock.com/'
    base_url='https://www.mariblock.com/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        articles = []
        article_collection = soup.find_all(
            'div', class_='gh-topic-content')
        for article in article_collection[:1]:
            badge_element = article.find(
                'a', class_='td-post-category')
            badge = badge_element.text.strip() if badge_element else ''
            title_element = soup.find('h3', class_='gh-card-title')

            # Extract only the title text
            title = title_element.get_text(strip=True) if title_element else ''
            image_element = article.find('img')
            image_url = image_element['src'].strip()

            if image_url.startswith('/'):
                image_url = base_url + image_url
            # description = article.find('p', class_='m-0').text.strip()
            publish = article.find(
                'time', class_='gh-card-date')
            publish_date = publish.text.strip() if publish else ''
            # image_element = article.find('img')['src']
            # image_url = image_element if image_element else None
            description = article.find(
                'div')
            if description:
                description = description.text.strip()
            else:
                description = ''
            logo = 'https://www.mariblock.com/content/images/2022/11/mariblock-logo-1.png'
            title_element = article.find('a', class_='ListPreview-Title')
            link = article.find('a',class_='gh-card-link')['href']
            if link.startswith('/'):
             link = base_url + link

            # Optional: make full URL if it’s relative (starts with /)
            if link and link.startswith('/'):
                link = 'https://www.mariblock.com/' + link
            author_element = article.find('span', class_='gh-card-author')
            author = author_element.text.strip() if author_element else ''

            logo1 = 'https://flagpedia.net/data/flags/h80/ng.webp'
            website_url='https://www.mariblock.com/'
            website='Mariblock'

            category = determine_category(title, description)
            # priority='34'
            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'link': link,
                'image_url': image_url,
                'logo': logo,
                'logo1': logo1,
                'author': author,
                'website': website,
                'website_url': website_url,
                'category': category

            })

        return articles
    except Timeout:
        print(f"Error: The request timed out while trying to access {url}.")
    except ConnectionError:
        print(f"Error: Connection error occurred while trying to access {url}.")
    except SSLError:
        print(f"Error: SSL certificate error occurred while trying to access {url}.")
    except RequestException as e:
        print(f"Error: An error occurred: {e}")

    return []

def fetch_news_edge():
    url = 'https://www.itedgenews.africa/category/news/'
    base_url = 'https://www.itedgenews.africa/'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }

    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        articles = []
        article_collection = soup.find_all('article')

        for article in article_collection[:1]:
            title_element = soup.find('h3', class_='post__title')
            title = title_element.get_text(strip=True) if title_element else ''

            link = article.find('a')['href']
            if link.startswith('/'):
                link = base_url + link


            badge_element = article.find('a', class_='cat-38')
            badge = badge_element.text.strip() if badge_element else ''

            image_element = article.find('img')
            image_url = image_element['src'].strip()

            if image_url.startswith('/'):
                image_url = base_url + image_url

            publish = article.find('time', class_='time published')
            publish_date = publish.text.strip() if publish else ''

            description_elem = article.find('div', class_='excerpt')
            description = description_elem.text.strip() if description_elem else ''

            author_elem = article.find('a', class_='entry-author__name')
            author = author_elem.text.strip() if author_elem else ''

            logo = 'https://www.itedgenews.africa/wp-content/uploads/2022/11/logo_africa2.png'
            logo1 = 'https://flagpedia.net/data/flags/h80/ng.webp'
            website_url = 'https://www.itedgenews.africa/'
            website = 'Itedgenews'

            category = determine_category(title, description)

            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'link': link,
                'image_url': image_url,
                'logo': logo,
                'logo1': logo1,
                'author': author,
                'website': website,
                'website_url': website_url,
                'category': category
            })

        return articles

    except Exception as e:
        print(f"Error: {e}")
        return []

def fetch_news_micro():
    url = 'https://news.microsoft.com/source/emea/region/africa/'
    base_url='https://news.microsoft.com/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        articles = []
        article_collection = soup.find_all(
            'article')
        for article in article_collection[:1]:
            badge_element = article.find(
                'li', class_='post-tags__item')
            badge = badge_element.text.strip() if badge_element else ''
            title_element = soup.find('h2')

            # Extract only the title text
            title = title_element.get_text(strip=True) if title_element else ''
            image_element = article.find('img')
            image_url = image_element['src'].strip()

            if image_url.startswith('/'):
                image_url = base_url + image_url
            # description = article.find('p', class_='m-0').text.strip()
            publish = article.find(
                'time', class_='published')
            publish_date = publish.text.strip() if publish else ''
            # image_element = article.find('img')['src']
            # image_url = image_element if image_element else None
            description = article.find(
                'span',class_='p')
            if description:
                description = description.text.strip()
            else:
                description = ''
            logo = 'https://uhf.microsoft.com/images/microsoft/RE1Mu3b.png'
            title_element = article.find('a', class_='ListPreview-Title')
            link = article.find('a',class_='gh-card-link')['href']
            if link.startswith('/'):
             link = base_url + link

            # Optional: make full URL if it’s relative (starts with /)
            if link and link.startswith('/'):
                link = 'https://news.microsoft.com/' + link
            author_element = article.find('span', class_='gh-card-author')
            author = author_element.text.strip() if author_element else ''

            logo1 = 'https://flagpedia.net/data/flags/h80/us.webp'
            website_url='https://news.microsoft.com/source/emea/region/africa/'
            website='Microsoft Africa'

            category = determine_category(title, description)
            # priority='34'
            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'link': link,
                'image_url': image_url,
                'logo': logo,
                'logo1': logo1,
                'author': author,
                'website': website,
                'website_url': website_url,
                'category': category

            })

        return articles
    except Timeout:
        print(f"Error: The request timed out while trying to access {url}.")
    except ConnectionError:
        print(f"Error: Connection error occurred while trying to access {url}.")
    except SSLError:
        print(f"Error: SSL certificate error occurred while trying to access {url}.")
    except RequestException as e:
        print(f"Error: An error occurred: {e}")

    return []

def fetch_news_mito():
    url = 'https://news.mit.edu/topic/africa'
    base_url='https://news.mit.edu'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        articles = []
        article_collection = soup.find_all(
            'div',class_='page-term--views--list')
        for article in article_collection[:1]:
            badge_element = article.find(
                'li', class_='post-tags__item')
            badge = badge_element.text.strip() if badge_element else ''
            title_element = soup.find('h3',class_='term-page--news-article--item--title')

            # Extract only the title text
            title = title_element.get_text(strip=True) if title_element else ''
            image_url = None

            image_element = article.find("img")
            if image_element:
                raw_src = image_element.get("data-src") or image_element.get("src")
                if raw_src:
                    if raw_src.startswith("/"):
                        image_url = "https://news.mit.edu" + raw_src
                    else:
                        image_url = raw_src
            # description = article.find('p', class_='m-0').text.strip()
            publish = article.find(
                'p', class_='term-page--news-article--item--publication-date')
            publish_date = publish.text.strip() if publish else ''
            # image_element = article.find('img')['src']
            # image_url = image_element if image_element else None
            description = article.find(
                'p',class_='term-page--news-article--item--dek')
            if description:
                description = description.text.strip()
            else:
                description = ''
            logo = 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTNi7UyMRpPBOXlE72HD3Gait7iI0dnDSdPlA&s'
            title_element = article.find('a', class_='ListPreview-Title')
            link = article.find('a',class_='term-page--news-article--item--title--link')['href']
            if link.startswith('/'):
             link = base_url + link

            # Optional: make full URL if it’s relative (starts with /)
            if link and link.startswith('/'):
                link = 'https://news.microsoft.com/' + link
            author_element = article.find('span', class_='gh-card-author')
            author = author_element.text.strip() if author_element else ''

            logo1 = 'https://flagpedia.net/data/flags/h80/us.webp'
            website_url='https://news.microsoft.com/source/emea/region/africa/'
            website='Massachusetts Institute of Technology'

            category = determine_category(title, description)
            # priority='34'
            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'link': link,
                'image_url': image_url,
                'logo': logo,
                'logo1': logo1,
                'author': author,
                'website': website,
                'website_url': website_url,
                'category': category

            })

        return articles
    except Timeout:
        print(f"Error: The request timed out while trying to access {url}.")
    except ConnectionError:
        print(f"Error: Connection error occurred while trying to access {url}.")
    except SSLError:
        print(f"Error: SSL certificate error occurred while trying to access {url}.")
    except RequestException as e:
        print(f"Error: An error occurred: {e}")

    return []


def fetch_news_techish():
    url = 'https://tech-ish.com/category/startups/'
    base_url = 'https://tech-ish.com/'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }

    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        articles = []
        article_collection = soup.find_all('li',class_='post-item')

        for article in article_collection[:1]:
            title_element = soup.find('h2', class_='post-title')
            title = title_element.get_text(strip=True) if title_element else ''

            link = article.find('a')['href']
            if link.startswith('/'):
                link = base_url + link


            badge_element = article.find('span')
            badge = badge_element.text.strip() if badge_element else ''

            image_element = article.find('img')
            image_url = image_element['src'].strip()

            if image_url.startswith('/'):
                image_url = base_url + image_url

            publish = article.find('span', class_='date meta-item ')
            publish_date = publish.text.strip() if publish else ''

            description_elem = article.find('p', class_='post-excerpt')
            description = description_elem.text.strip() if description_elem else ''

            author_elem = article.find('span', class_='meta-author')
            author = author_elem.text.strip() if author_elem else ''

            logo = 'https://tech-ish.com/wp-content/uploads/2024/04/Coloured-LOGO.png'
            logo1 = 'https://flagpedia.net/data/flags/h80/ke.webp'
            website_url = 'https://tech-ish.com/'
            website = 'Tech-ish'

            category = determine_category(title, description)

            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'link': link,
                'image_url': image_url,
                'logo': logo,
                'logo1': logo1,
                'author': author,
                'website': website,
                'website_url': website_url,
                'category': category
            })

        return articles

    except Exception as e:
        print(f"Error: {e}")
        return []


def fetch_news_urban():
    url = 'https://urbangeekz.com/news/'
    base_url = 'https://urbangeekz.com/'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }

    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        articles = []
        article_collection = soup.find_all('article')

        for article in article_collection[:1]:
            title_element = soup.find('div', class_='post-title')
            title = title_element.get_text(strip=True) if title_element else ''

            link = article.find('a')['href']
            if link.startswith('/'):
                link = base_url + link


            badge_element = article.find('li')
            badge = badge_element.text.strip() if badge_element else ''

            image_element = article.find('img')
            image_url = image_element['src'].strip()

            if image_url.startswith('/'):
                image_url = base_url + image_url

            publish = article.find('span', class_='post-date updated')
            publish_date = publish.text.strip() if publish else ''

            description_elem = article.find('div', class_='post-excerpt')
            description = description_elem.text.strip() if description_elem else ''

            author_elem = article.find('span', class_='fn')
            author = author_elem.text.strip() if author_elem else ''

            logo = 'https://urbangeekz.com/wp-content/uploads/2015/11/logo_v5-slogan-retina.png'
            logo1 = 'https://flagpedia.net/data/flags/h80/us.webp'
            website_url = 'https://urbangeekz.com/'
            website = 'Tech-ish'

            category = determine_category(title, description)

            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'link': link,
                'image_url': image_url,
                'logo': logo,
                'logo1': logo1,
                'author': author,
                'website': website,
                'website_url': website_url,
                'category': category,

            })

        return articles

    except Exception as e:
        print(f"Error: {e}")
        return []


def fetch_news_urbano():
    url = 'https://birrmetrics.com/news/'
    base_url = 'https://birrmetrics.com'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }

    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        articles = []
        article_collection = soup.find_all('div',class_='qodef-e-content')

        for article in article_collection[:1]:
            title_element = soup.find('h5',class_='qodef-e-title entry-title')
            title = title_element.get_text(strip=True) if title_element else ''

            link = article.find('a')['href']
            if link.startswith('/'):
                link = base_url + link


            badge_element = article.find('li')
            badge = badge_element.text.strip() if badge_element else ''

            image_element = article.find('img')
            image_url = image_element['src'].strip()

            if image_url.startswith('/'):
                image_url = base_url + image_url

            publish = article.find('span', class_='font-reqular')
            publish_date = publish.text.strip() if publish else ''

            description_elem = article.find('p', class_='qodef-e-excerpt')
            description = description_elem.text.strip() if description_elem else ''

            author_elem = article.find('div', class_='flex')
            author = author_elem.text.strip() if author_elem else ''

            logo = 'https://birrmetrics.com/wp-content/uploads/2021/06/logo-full-150x80.png'
            logo1 = 'https://flagpedia.net/data/flags/h80/et.webp'
            website_url = 'https://urbangeekz.com/'
            website = 'Birrmetrics'

            category = determine_category(title, description)

            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'link': link,
                'image_url': image_url,
                'logo': logo,
                'logo1': logo1,
                'author': author,
                'website': website,
                'website_url': website_url,
                'category': category
            })

        return articles

    except Exception as e:
        print(f"Error: {e}")
        return []
    

def fetch_news_tz():
    url = 'https://techfinancials.co.za/news/'
    base_url='https://techfinancials.co.za/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        articles = []
        article_collection = soup.find_all(
            'div',class_='loop loop-grid loop-grid-base grid grid-2 md:grid-2 xs:grid-1')
        for article in article_collection[:1]:
            badge_element = article.find(
                'div', class_='post-meta-items meta-above')
            badge = badge_element.text.strip() if badge_element else ''
            title_element = soup.find('h2',class_='is-title post-title')

            # Extract only the title text
            title = title_element.get_text(strip=True) if title_element else ''
            image_url = None

            image_element = article.find("img")
            if image_element:
                raw_src = image_element.get("data-src") or image_element.get("src")
                if raw_src:
                    if raw_src.startswith("/"):
                        image_url = "https://news.mit.edu" + raw_src
                    else:
                        image_url = raw_src
            # description = article.find('p', class_='m-0').text.strip()
            publish = article.find(
                'span', class_='date meta-item')
            publish_date = publish.text.strip() if publish else ''
            # image_element = article.find('img')['src']
            # image_url = image_element if image_element else None
            description = article.find(
                'p')
            if description:
                description = description.text.strip()
            else:
                description = ''
            logo = 'https://b2726120.smushcdn.com/2726120/wp-content/uploads/2025/03/techfinancials-2025-2-03.png?lossy=2&strip=1&webp=1&size=313x0'
            title_element = article.find('a')
            link = article.find('a')['href']
            if link.startswith('/'):
             link = base_url + link

            # Optional: make full URL if it’s relative (starts with /)
            if link and link.startswith('/'):
                link = 'https://dailynews.co.tz/' + link
            author_element = article.find('span')
            author = author_element.text.strip() if author_element else 'Tech Financials'

            logo1 = 'https://flagpedia.net/data/flags/w1160/za.webp'
            website_url='https://techfinancials.co.za/'
            website='Tech Financials'

            category = determine_category(title, description)
            priority='24'
            section = 'side'
            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'link': link,
                'image_url': image_url,
                'logo': logo,
                'logo1': logo1,
                'author': author,
                'website': website,
                'website_url': website_url,
                'category': category,
                'priority': priority,
                'section': section

            })

        return articles
    except Timeout:
        print(f"Error: The request timed out while trying to access {url}.")
    except ConnectionError:
        print(f"Error: Connection error occurred while trying to access {url}.")
    except SSLError:
        print(f"Error: SSL certificate error occurred while trying to access {url}.")
    except RequestException as e:
        print(f"Error: An error occurred: {e}")

    return []
def fetch_news_tz0():
    url = 'https://techafricanews.com/category/technology/'
    base_url='https://techafricanews.com/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        articles = []
        article_collection = soup.find_all(
            'div',class_='loop loop-grid loop-grid-base grid grid-2 md:grid-2 xs:grid-1')
        for article in article_collection[:1]:
            badge_element = article.find(
                'div', class_='post-meta-items meta-above')
            badge = badge_element.text.strip() if badge_element else ''
            title_element = soup.find('h2',class_='is-title post-title')

            # Extract only the title text
            title = title_element.get_text(strip=True) if title_element else ''
            image_url = None

            image_element = article.find("img")
            if image_element:
                raw_src = image_element.get("data-src") or image_element.get("src")
                if raw_src:
                    if raw_src.startswith("/"):
                        image_url = "https://news.mit.edu" + raw_src
                    else:
                        image_url = raw_src
            # description = article.find('p', class_='m-0').text.strip()
            publish = article.find(
                'span', class_='date meta-item')
            publish_date = publish.text.strip() if publish else ''
            # image_element = article.find('img')['src']
            # image_url = image_element if image_element else None
            description = article.find(
                'p')
            if description:
                description = description.text.strip()
            else:
                description = ''
            logo = 'https://b2726120.smushcdn.com/2726120/wp-content/uploads/2025/03/techfinancials-2025-2-03.png?lossy=2&strip=1&webp=1&size=313x0'
            title_element = article.find('a')
            link = article.find('a')['href']
            if link.startswith('/'):
             link = base_url + link

            # Optional: make full URL if it’s relative (starts with /)
            if link and link.startswith('/'):
                link = 'https://dailynews.co.tz/' + link
            author_element = article.find('span')
            author = author_element.text.strip() if author_element else ''

            logo1 = 'https://flagpedia.net/data/flags/w1160/za.webp'
            website_url='https://techfinancials.co.za/'
            website='Tech Financials'

            category = determine_category(title, description)
            priority='24'
            section = 'side'
            articles.append({
                'title': title,
                'badge': badge,
                'description': description,
                'publish_date': publish_date,
                'link': link,
                'image_url': image_url,
                'logo': logo,
                'logo1': logo1,
                'author': author,
                'website': website,
                'website_url': website_url,
                'category': category,
                'priority': priority,
                'section': section

            })

        return articles
    except Timeout:
        print(f"Error: The request timed out while trying to access {url}.")
    except ConnectionError:
        print(f"Error: Connection error occurred while trying to access {url}.")
    except SSLError:
        print(f"Error: SSL certificate error occurred while trying to access {url}.")
    except RequestException as e:
        print(f"Error: An error occurred: {e}")

    return []


def save_articles_to_db(articles):
    if articles:
        for article in articles:
            title = article.get('title')
            summary = article.get('summary')
            link = article.get('link')
            section = article.get('section')
            existing = ScrapedArticle.query.filter_by(link=article['link']).first()

            if existing:
                continue  # skip duplicates
         # 🤖 AI CATEGORY (ADD THIS HERE)
            try:
                ai_category = categorize_article(title, summary)
            except:
                ai_category = "Other"
            category = article.get('category') or determine_category(
                article['title'], article['description']
            )

            new_article = ScrapedArticle(
                title=article['title'],
                description=article['description'],
                publish_date=article['publish_date'],
                link=article['link'],
                author=article['author'],
                logo=article['logo'],
                logo1=article['logo1'],
                website=article['website'],
                image_url=article['image_url'],
                badge=article.get('badge'),
                website_url=article.get('website_url'),
                category=category,
                section=article['section'],
            )

            db.session.add(new_article)

        db.session.commit()
        print("✅ Articles saved")

    else:
        print("❌ No articles found")

# def scheduled_scrape():
#     with app.app_context():
      
#         articles_disrupt = [dict(article, section="top") for article in fetch_news_disrupt()]
#         articles_voa = [dict(article, section="top") for article in fetch_news_voa_central()]
#         articles_central = [dict(article, section="top") for article in fetch_news_central()]
#         articles_business = [dict(article, section="top") for article in fetch_news_techbusiness()]
#         articles_techpoint = [dict(article, section="top") for article in fetch_news_techpoint()]
#         articles_cio = [dict(article, section="top") for article in fetch_news_cio()]
#         articles_techarena = [dict(article, section="top") for article in fetch_news_techarena()]
#         articles_teche = [dict(article, section="top") for article in fetch_news_teche()]
#         articles_village = [dict(article, section="top") for article in fetch_news_village()]


#         articles_labali = [dict(article, section="side") for article in fetch_news_techlabari()]
#         articles_pc = [dict(article, section="side") for article in fetch_news_pctechmag()]
#         articles_construction = [dict(article, section="top") for article in fetch_news_construction()]
#         articles_afri = [dict(article, section="side") for article in fetch_news_afrimirror()]
#         articles_newtimes = [dict(article, section="side") for article in fetch_news_newtimes()]
#         articles_shega = [dict(article, section="side") for article in fetch_news_shega()]
#         articles_appy = [dict(article, section="side") for article in fetch_news_app()]
#         articles_mari = [dict(article, section="side") for article in fetch_news_mari()]

        
#         articles_techloy = [dict(article, section="bottom") for article in fetch_news_techloy()]
#         articles_coin = [dict(article, section="bottom") for article in fetch_news_coinweez()]
#         articles_datacenter = [dict(article, section="bottom") for article in fetch_news_datacenter()]
#         articles_view = [dict(article, section="bottom") for article in fetch_news_techview()]
#         articles_angola = [dict(article, section="bottom") for article in fetch_news_angola()]


#         articles_crunch = [dict(article, section="bottom") for article in fetch_news_techcrunch()]
#         articles_techish = [dict(article, section="bottom") for article in fetch_news_techish()]
#         articles_egypt = [dict(article, section="bottom") for article in fetch_news_egypt()]
#         articles_fins = [dict(article, section="bottom") for article in fetch_news_fins()]
#         articles_build = [dict(article, section="bottom") for article in fetch_news_techbuild()]

#         articles_poc = [dict(article, section="way") for article in fetch_news_pocit()]
#         articles_edge = [dict(article, section="top") for article in fetch_news_edge()]
#         articles_micro = [dict(article, section="ways") for article in fetch_news_micro()]
#         articles_mito = [dict(article, section="wayo") for article in fetch_news_mito()]


#         all_articles = (
#             articles_disrupt + articles_voa + articles_central +
#             articles_business + articles_techpoint + articles_cio +
#             articles_techarena + articles_teche +
#             articles_village +
#             articles_labali + articles_newtimes + articles_pc +
#             articles_crunch + articles_fins + articles_techloy +
#             articles_construction + articles_afri + articles_egypt +
#             articles_datacenter + articles_shega +articles_newtimes+
#             articles_view+ articles_angola+
#             articles_coin+articles_build+
#             articles_poc+articles_appy+articles_mari+articles_edge+
#             articles_micro+ articles_mito+articles_techish
#         )

#         save_articles_to_db(all_articles)
      
#         print("✅ Scheduled scraping complete")


# @app.route('/scrape')
# def scrape_and_populate():
#     # ---- TOP articles ----
#     articles_disrupt = [dict(article, section="top") for article in fetch_news_disrupt()]
#     articles_voa = [dict(article, section="top") for article in fetch_news_voa_central()]
#     articles_central = [dict(article, section="top") for article in fetch_news_central()]
#     articles_business = [dict(article, section="top") for article in fetch_news_techbusiness()]
#     articles_techpoint = [dict(article, section="top") for article in fetch_news_techpoint()]
#     articles_cio = [dict(article, section="top") for article in fetch_news_cio()]
#     articles_techarena = [dict(article, section="top") for article in fetch_news_techarena()]
#     articles_construction = [dict(article, section="top") for article in fetch_news_construction()]
#     articles_teche = [dict(article, section="top") for article in fetch_news_teche()]
#     articles_village = [dict(article, section="top") for article in fetch_news_village()]


#     articles_labali = [dict(article, section="side") for article in fetch_news_techlabari()]
#     articles_pc = [dict(article, section="side") for article in fetch_news_pctechmag()]
#     articles_newtimes = [dict(article, section="side") for article in fetch_news_newtimes()]
#     articles_afri = [dict(article, section="side") for article in fetch_news_afrimirror()]
#     articles_shega = [dict(article, section="side") for article in fetch_news_shega()]
#     articles_appy = [dict(article, section="side") for article in fetch_news_app()]
#     articles_mari = [dict(article, section="side") for article in fetch_news_mari()]

  
#     articles_techloy = [dict(article, section="bottom") for article in fetch_news_techloy()]
#     articles_coin = [dict(article, section="bottom") for article in fetch_news_coinweez()]
#     articles_datacenter = [dict(article, section="bottom") for article in fetch_news_datacenter()]
#     articles_view = [dict(article, section="bottom") for article in fetch_news_techview()]
#     articles_angola = [dict(article, section="bottom") for article in fetch_news_angola()]


#     articles_crunch = [dict(article, section="bottom") for article in fetch_news_techcrunch()]
#     articles_build = [dict(article, section="bottom") for article in fetch_news_techbuild()]
#     articles_egypt = [dict(article, section="bottom") for article in fetch_news_egypt()]
#     articles_techish = [dict(article, section="bottom") for article in fetch_news_techish()]
#     articles_fins = [dict(article, section="bottom") for article in fetch_news_fins()]


#     articles_poc = [dict(article, section="way") for article in fetch_news_pocit()]
#     articles_edge = [dict(article, section="top") for article in fetch_news_edge()]
#     articles_micro = [dict(article, section="ways") for article in fetch_news_micro()]
#     articles_mito = [dict(article, section="wayo") for article in fetch_news_mito()]


#     all_articles = (
#         articles_disrupt + articles_voa + articles_central +
#         articles_business + articles_techpoint + articles_cio +
#         articles_techarena + articles_teche +
#         articles_village +
#         articles_labali  + articles_pc +
#         articles_crunch + articles_fins + articles_techloy +
#         articles_construction + articles_afri + articles_egypt +
#         articles_datacenter + articles_shega +  articles_newtimes+
#         articles_view+ articles_angola+
#         articles_coin+articles_build+
#         articles_poc+articles_appy+articles_mari+articles_edge+
#         articles_micro+ articles_mito+articles_techish
#     )

#     save_articles_to_db(all_articles)
#     return "Data scraped and inserted into the database fast!"


# @app.route('/')
# @app.route('/all')
# def index():
#     category = request.args.get('category')

   
#     subquery = db.session.query(
#         ScrapedArticle.website,
#         func.max(ScrapedArticle.id).label('latest_id')
#     )

#     if category:
#         subquery = subquery.filter(ScrapedArticle.category == category)

#     subquery = subquery.group_by(ScrapedArticle.website).subquery()

  
#     articles = db.session.query(ScrapedArticle).join(
#         subquery,
#         and_(
#             ScrapedArticle.website == subquery.c.website,
#             ScrapedArticle.id == subquery.c.latest_id
#         )
#     ).order_by(ScrapedArticle.publish_date.desc()).all()

#     return render_template('index.html', articles=articles, category=category)


# def scheduled_scrape():
#     with app.app_context():
#         print("🚀 Running scheduled scrape...")
#         articles = scrape_and_populate()
#         save_articles_to_db(articles)


# if not app.debug or os.environ.get("WERKZEUG_RUN_MAIN") == "true":
#     scheduler = BackgroundScheduler()
#     scheduler.add_job(
#         func=scheduled_scrape,
#         trigger=IntervalTrigger(minutes=30),
#         id='scrape_job',
#         replace_existing=True
#     )
#     scheduler.start()
# if __name__ == '__main__':
#     app.run(debug=True)

def save_articles_to_db(articles):
    if articles:  # Only proceed if there are articles
        db.session.query(ScrapedArticle).delete()  # Clear existing articles
        db.session.commit()
        for article in articles:
            # Determine category based on the title and description
            category = determine_category(article['title'], article['description'])

            new_article = ScrapedArticle(
                title=article['title'],
                description=article['description'],
                publish_date=article['publish_date'],
                link=article['link'],
                author=article['author'],
                logo=article['logo'],
                logo1=article['logo1'],
                website=article['website'],
                image_url=article['image_url'],
                badge=article.get('badge', None),
                website_url=article.get('website_url', None),
                # category=category
                category=article['category'],

                section=article['section']
            )
            db.session.add(new_article)
        db.session.commit()
        print("Articles saved to the database.")
    else:
        print("No articles found. Database was not updated.")

# Periodic scraping task
# def scheduled_scrape():
#     with app.app_context():
     
#         articles_disrupt = [dict(article, section="top") for article in fetch_news_disrupt()]
#         articles_voa = [dict(article, section="top") for article in fetch_news_voa_central()]
#         articles_central = [dict(article, section="top") for article in fetch_news_central()]
#         articles_business = [dict(article, section="top") for article in fetch_news_techbusiness()]
#         articles_techpoint = [dict(article, section="top") for article in fetch_news_techpoint()]
#         articles_cio = [dict(article, section="top") for article in fetch_news_cio()]
#         articles_techarena = [dict(article, section="top") for article in fetch_news_techarena()]
#         articles_namibia = [dict(article, section="top") for article in fetch_news_namibia()]
#         articles_village = [dict(article, section="top") for article in fetch_news_village()]


#         articles_labali = [dict(article, section="side") for article in fetch_news_techlabari()]
#         articles_pc = [dict(article, section="side") for article in fetch_news_pctechmag()]
#         articles_construction = [dict(article, section="top") for article in fetch_news_construction()]
#         articles_afri = [dict(article, section="side") for article in fetch_news_afrimirror()]
#         articles_newtimes = [dict(article, section="side") for article in fetch_news_newtimes()]
       
#         articles_appy = [dict(article, section="side") for article in fetch_news_app()]
#         articles_mari = [dict(article, section="side") for article in fetch_news_mari()]
#         articles_crunch = [dict(article, section="side") for article in fetch_news_techcrunch()]
#         articles_tz = [dict(article, section="side") for article in fetch_news_tz()]
       
#         articles_techloy = [dict(article, section="bottom") for article in fetch_news_techloy()]
#         articles_coin = [dict(article, section="bottom") for article in fetch_news_coinweez()]
#         articles_datacenter = [dict(article, section="bottom") for article in fetch_news_datacenter()]
#         articles_view = [dict(article, section="bottom") for article in fetch_news_techview()]
#         articles_angola = [dict(article, section="bottom") for article in fetch_news_angola()]
        
#         articles_techish = [dict(article, section="bottom") for article in fetch_news_techish()]
#         articles_egypt = [dict(article, section="bottom") for article in fetch_news_egypt()]
#         articles_fins = [dict(article, section="bottom") for article in fetch_news_fins()]
#         articles_build = [dict(article, section="bottom") for article in fetch_news_techbuild()]

#         articles_poc = [dict(article, section="way") for article in fetch_news_pocit()]
#         articles_edge = [dict(article, section="top") for article in fetch_news_edge()]
#         articles_micro = [dict(article, section="ways") for article in fetch_news_micro()]
#         articles_mito = [dict(article, section="wayo") for article in fetch_news_mito()]


#         all_articles = (
#             articles_disrupt + articles_voa + articles_central +
#             articles_business + articles_techpoint + articles_cio +
#             articles_techarena + articles_namibia+
#             articles_village +
#             articles_labali + articles_newtimes + articles_pc +
#             articles_crunch + articles_fins + articles_techloy +
#             articles_construction + articles_afri + articles_egypt +
#             articles_datacenter +articles_newtimes+
#             articles_view+ articles_angola+
#             articles_coin+articles_build+
#             articles_poc+articles_appy+articles_mari+articles_edge+
#             articles_micro+ articles_mito+articles_techish+articles_tz
#         )

#         save_articles_to_db(all_articles)
#         print("✅ Scheduled scraping complete")
def scheduled_scrape():
    with app.app_context():

        print("📰 Scraping started...")

        # ---- SCRAPE ALL SOURCES ----
        articles_disrupt = [dict(article, section="top") for article in fetch_news_disrupt()]
        articles_voa = [dict(article, section="top") for article in fetch_news_voa_central()]
        articles_central = [dict(article, section="top") for article in fetch_news_central()]
        articles_business = [dict(article, section="top") for article in fetch_news_techbusiness()]
        articles_techpoint = [dict(article, section="top") for article in fetch_news_techpoint()]
        articles_cio = [dict(article, section="top") for article in fetch_news_cio()]
        articles_techarena = [dict(article, section="top") for article in fetch_news_techarena()]
        articles_namibia = [dict(article, section="top") for article in fetch_news_namibia()]
        articles_village = [dict(article, section="top") for article in fetch_news_village()]

        # ---- SIDE ----
        articles_labali = [dict(article, section="side") for article in fetch_news_techlabari()]
        articles_pc = [dict(article, section="side") for article in fetch_news_pctechmag()]
        articles_afri = [dict(article, section="side") for article in fetch_news_afrimirror()]
        articles_newtimes = [dict(article, section="side") for article in fetch_news_newtimes()]
        articles_appy = [dict(article, section="side") for article in fetch_news_app()]
        articles_mari = [dict(article, section="side") for article in fetch_news_mari()]
        articles_crunch = [dict(article, section="side") for article in fetch_news_techcrunch()]
        articles_tz = [dict(article, section="side") for article in fetch_news_tz()]

        # ---- BOTTOM ----
        articles_techloy = [dict(article, section="bottom") for article in fetch_news_techloy()]
        articles_coin = [dict(article, section="bottom") for article in fetch_news_coinweez()]
        articles_datacenter = [dict(article, section="bottom") for article in fetch_news_datacenter()]
        articles_view = [dict(article, section="bottom") for article in fetch_news_techview()]
        articles_angola = [dict(article, section="bottom") for article in fetch_news_angola()]
        articles_techish = [dict(article, section="bottom") for article in fetch_news_techish()]
        articles_egypt = [dict(article, section="bottom") for article in fetch_news_egypt()]
        articles_fins = [dict(article, section="bottom") for article in fetch_news_fins()]
        articles_build = [dict(article, section="bottom") for article in fetch_news_techbuild()]

        # ---- SPECIAL ----
        articles_poc = [dict(article, section="way") for article in fetch_news_pocit()]
        articles_edge = [dict(article, section="top") for article in fetch_news_edge()]
        articles_micro = [dict(article, section="ways") for article in fetch_news_micro()]
        articles_mito = [dict(article, section="wayo") for article in fetch_news_mito()]

        # ---- MERGE ----
        all_articles = (
            articles_disrupt + articles_voa + articles_central +
            articles_business + articles_techpoint + articles_cio +
            articles_techarena + articles_namibia + articles_village +
            articles_labali + articles_pc + articles_afri +
            articles_newtimes + articles_appy + articles_mari +
            articles_crunch + articles_tz + articles_techloy +
            articles_coin + articles_datacenter + articles_view +
            articles_angola + articles_techish + articles_egypt +
            articles_fins + articles_build + articles_poc +
            articles_edge + articles_micro + articles_mito
        )

        print(f"💾 Saving {len(all_articles)} articles...")
        save_articles_to_db(all_articles)

        # =========================
        # 🤖 AI PIPELINE (NEW PART)
        # =========================

        print("🤖 Running AI pipeline...")

        articles = ScrapedArticle.query.filter(
            ScrapedArticle.ai_summary == None
        ).limit(50).all()

        for article in articles:
            try:
                article.ai_summary = generate_ai_summary(article.title, article.description)
                article.ai_tags = generate_ai_tags(article.title, article.description)
                article.ai_category = categorize_article(article.title, article.description)

                print(f"✅ AI processed: {article.title[:40]}")

            except Exception as e:
                print(f"❌ AI error: {e}")

        db.session.commit()

        print("🎉 Scheduled scraping + AI completed")

@app.route('/scrape')
def scrape_and_populate():
    # ---- TOP articles ----
    articles_disrupt = [dict(article, section="top") for article in fetch_news_disrupt()]
    articles_voa = [dict(article, section="top") for article in fetch_news_voa_central()]
    articles_central = [dict(article, section="top") for article in fetch_news_central()]
    articles_business = [dict(article, section="top") for article in fetch_news_techbusiness()]
    articles_techpoint = [dict(article, section="top") for article in fetch_news_techpoint()]
    articles_cio = [dict(article, section="top") for article in fetch_news_cio()]
    articles_techarena = [dict(article, section="top") for article in fetch_news_techarena()]
    articles_construction = [dict(article, section="top") for article in fetch_news_construction()]
    articles_namibia = [dict(article, section="top") for article in fetch_news_namibia()]
    articles_village = [dict(article, section="top") for article in fetch_news_village()]


    # ---- SIDE articles ----
    articles_labali = [dict(article, section="side") for article in fetch_news_techlabari()]
    articles_pc = [dict(article, section="side") for article in fetch_news_pctechmag()]
    articles_newtimes = [dict(article, section="side") for article in fetch_news_newtimes()]
    articles_afri = [dict(article, section="side") for article in fetch_news_afrimirror()]
    # articles_shega = [dict(article, section="side") for article in fetch_news_shega()]
    articles_appy = [dict(article, section="side") for article in fetch_news_app()]
    articles_mari = [dict(article, section="side") for article in fetch_news_mari()]
    articles_crunch = [dict(article, section="side") for article in fetch_news_techcrunch()]
    articles_tz = [dict(article, section="side") for article in fetch_news_tz()]
    # ---- BOTTOM articles ----
    articles_techloy = [dict(article, section="bottom") for article in fetch_news_techloy()]
    articles_coin = [dict(article, section="bottom") for article in fetch_news_coinweez()]
    articles_datacenter = [dict(article, section="bottom") for article in fetch_news_datacenter()]
    articles_view = [dict(article, section="bottom") for article in fetch_news_techview()]
    articles_angola = [dict(article, section="bottom") for article in fetch_news_angola()]


    articles_build = [dict(article, section="bottom") for article in fetch_news_techbuild()]
    articles_egypt = [dict(article, section="bottom") for article in fetch_news_egypt()]
    articles_techish = [dict(article, section="bottom") for article in fetch_news_techish()]
    articles_fins = [dict(article, section="bottom") for article in fetch_news_fins()]


    articles_poc = [dict(article, section="way") for article in fetch_news_pocit()]
    articles_edge = [dict(article, section="top") for article in fetch_news_edge()]
    articles_micro = [dict(article, section="ways") for article in fetch_news_micro()]
    articles_mito = [dict(article, section="wayo") for article in fetch_news_mito()]


    # Merge all
    all_articles = (
        articles_disrupt + articles_voa + articles_central +
        articles_business + articles_techpoint + articles_cio +
        articles_techarena  +articles_namibia+
        articles_village +
        articles_labali  + articles_pc +
        articles_crunch + articles_fins + articles_techloy +
        articles_construction + articles_afri + articles_egypt +
        articles_datacenter + articles_newtimes+
        articles_view+ articles_angola+
        articles_coin+articles_build+
        articles_poc+articles_appy+articles_mari+articles_edge+
        articles_micro+ articles_mito+articles_techish+articles_tz
    )

    save_articles_to_db(all_articles)
    return "Data scraped and inserted into the database fast!"


@app.route('/')
@app.route('/all')
def index():
    category = request.args.get('category')

    if category:
        articles = ScrapedArticle.query.filter_by(category=category).order_by(
            ScrapedArticle.priority, ScrapedArticle.publish_date.desc()
        ).all()
    else:

        articles = ScrapedArticle.query.order_by(
            ScrapedArticle.publish_date.desc()
        ).all()

    return render_template('index.html', articles=articles, category=category)
scheduler = BackgroundScheduler()
scheduler.add_job(
    func=scheduled_scrape,
    trigger=IntervalTrigger(minutes=30),   # <-- every 30 minutes
    id='scrape_job',
    replace_existing=True
)

# @app.route('/run-ai')
# def run_ai():
#     threading.Thread(target=run_ai_background, daemon=True).start()
#     return "🤖 AI started"
# scheduler.start()

# with app.app_context():
#     scheduled_scrape()
scheduler.start()

print("🟢 Scheduler started at:", datetime.now())

with app.app_context():
    print("🚀 Initial scrape triggered at:", datetime.now())
    scheduled_scrape()
if __name__ == '__main__':
    app.run(debug=True)