<?php

namespace App\Imports\Blog;

use App\Jobs\SandicliffeArticleImport;
use DOMDocument;
use DOMXPath;
use Illuminate\Console\OutputStyle;
use Illuminate\Foundation\Bus\DispatchesJobs;
use Illuminate\Support\Facades\Log;

class SandicliffeBlogImport
{
    use DispatchesJobs;

    private const ARTICLE_LISTING_URL = 'https://www.sandicliffe.co.uk/blog-landing';
    private const ARTICLE_BASE_URL = 'https://www.sandicliffe.co.uk';

    public function __construct(
        private readonly OutputStyle $output
    ) {
        //
    }

    public function handle(): void
    {
        $this->output->info('Import started!');
        collect($this->getArticlesToImport())
            ->tap(fn($items) => $this->output->info($items->count() . ' entries to import'))
            ->each(function ($article) {
                $this->dispatch(new SandicliffeArticleImport($article['url'], $article['image_url'], $this->output));
            });
    }

    private function getArticlesToImport(): array
    {
        $articles = [];

        // get page data from url
        $html = @file_get_contents(self::ARTICLE_LISTING_URL);
        if (!$html) {
            Log::error('Failed to fetch articles from ' . self::ARTICLE_LISTING_URL);
        }

        $doc = new DOMDocument();
        libxml_use_internal_errors(true);
        $doc->loadHTML($html);
        libxml_clear_errors();

        $xpath = new DOMXPath($doc);
        $nodes = $xpath->query("//* [contains(@class, 'blog-tile-image')]");

        foreach ($nodes as $node) {
            if ($node->tagName !== 'a') {
                continue;
            }

            $images = $node->getElementsByTagName('img');
            $image = $images[0]->getAttribute('src') ?? '';
            $image = str_replace(' ', '%20', $image);

            $articles[] = [
                'url' => self::ARTICLE_BASE_URL . $node->getAttribute('href'),
                'image_url' => $image,
            ];
        }

        return $articles;
    }
}
