<?php

namespace App\Jobs;

use DOMDocument;
use DOMXPath;
use Exception;
use Illuminate\Bus\Queueable;
use Illuminate\Console\OutputStyle;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Carbon;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Str;
use Mtc\ContentManager\Contracts\PageModel;
use Mtc\ContentManager\Facades\Media;
use Mtc\ContentManager\Facades\Page;
use Mtc\ContentManager\Models\MediaFolder;
use Mtc\MercuryDataModels\SeoData;
use Mtc\MercuryDataModels\Template;

class SandicliffeArticleImport implements ShouldQueue
{
    use Dispatchable;
    use InteractsWithQueue;
    use Queueable;
    use SerializesModels;

    private MediaFolder $media_folder;

    /**
     * Create a new job instance.
     */
    public function __construct(
        private readonly string $url,
        private readonly string $image_url,
        private readonly OutputStyle $output
    ) {
        $this->media_folder = MediaFolder::query()->firstOrCreate(['name' => 'Blog Images']);
    }

    /**
     * Execute the job.
     */
    public function handle(): void
    {
        try {
            // get page data from url
            $html = @file_get_contents($this->url);
            if (!$html) {
                Log::error(__CLASS__ . '::' . __FUNCTION__ . '() Failed to retrieve content for url: ' . $this->url);
                return;
            }

            $doc = new DOMDocument();
            libxml_use_internal_errors(true);
            $doc->loadHTML($html);
            libxml_clear_errors();

            $xpath = new DOMXPath($doc);

            $page = app(PageModel::class)->updateOrCreate([
                'slug' => $this->getSlug($this->url),
            ], [
                'template_id' => $this->getTemplateId(),
                'title' => $this->getTitle($doc),
                'status' => 'published',
                'published_at' => $this->getPublishedDate($xpath),
                'category' => $this->getCategory(),
            ]);

            Page::syncContentWithTemplate($page->id);
            $this->updatePageSeo($page, $xpath);
            $this->saveThumbnail($page);
            $this->saveContent($page, $xpath, $doc);
        } catch (Exception $exception) {
            Log::error('Exception: ' . $exception->getMessage());
        }
    }

    private function saveContent(PageModel $page, DOMXPath $xpath, DOMDocument $doc): void
    {
        $class_name_content = 'experience-component experience-commerce_assets-editorialRichText';
        $class_name_cta = 'experience-component experience-commerce_assets-ctaButton';

        foreach ($page->content as $content) {
            foreach ($content->subContent as $subContent) {
                $blog_content = '';

                if (strtoupper($content->name) == 'ARTICLE IMAGE' && strtoupper($subContent->name) == 'IMAGE') {
                    // get the main image
                    $main_image_div = $xpath->query("//div [contains(@class, 'post-top-img')]")->item(0);

                    if (!$main_image_div) {
                        continue;
                    }

                    $original_image_src = $main_image_div
                        ->getElementsByTagName('img')
                        ->item(0)
                        ->getAttribute('src');
                    $new_image = $this->importImage($original_image_src);
                    Media::setUsesForModel([$new_image->id], $subContent);
                    $blog_content .= '<img src="' . $new_image->original_url . '"/>';
                }

                if (strtoupper($content->name) == 'RICH TEXT' && strtoupper($subContent->name) == 'TEXT') {
                    // get the intro text
                    $intro = $xpath->query("//p [contains(@class, 'intro')]")->item(0);
                    $blog_content .= '<p>' . $intro->textContent . '</p>';

                    // get the remaining text
                    $content_div = $xpath->query("//div [contains(@class, 'experience-blogmain')]")->item(0);
                    $imported_external_image_urls = [];

                    if (!$content_div) {
                        continue;
                    }

                    foreach ($content_div->getElementsByTagName('div') as $div_item) {
                        if ($div_item && $div_item->getAttribute('class') == $class_name_content) {
                            foreach ($div_item->getElementsByTagName('p') as $p) {
                                // import all content including HTML
                                $blog_content .= $doc->saveHTML($p);
                            }
                        } else {
                            // We are having an issue repeatedly importing the same image.
                            // This is due to queries bringing back all img nodes.
                            // Rather than doing a recursive process, a quick and simple solution for this blog import
                            // is to keep track of imported images and only import each one once.
                            // There is a risk that we ignore images that are intentionally included twice in the html.
                            foreach ($xpath->query('.//img', $div_item) as $image) {
                                $original_image_src = $image->getAttribute('src');

                                if (stripos($original_image_src, 'base64') !== false) {
                                    continue;
                                }

                                if (array_key_exists($original_image_src, $imported_external_image_urls)) {
                                    continue;
                                }

                                $imported_external_image_urls[$original_image_src] = $original_image_src;
                                $new_image = $this->importImage($original_image_src);
                                Media::setUsesForModel([$new_image->id], $subContent);
                                $blog_content .= '<img src="' . $new_image->original_url . '"/>';
                            }
                        }

                        if ($div_item->getAttribute('class') == $class_name_cta) {
                            foreach ($div_item->getElementsByTagName('a') as $a) {
                                // import all content including HTML
                                $blog_content .= '<p>' . $doc->saveHTML($a) . '</p>';
                            }
                        }
                    }
                }

                // save the content
                $subContent->content = $this->markdownToHtml($blog_content);
                $subContent->save();
            }
        }
    }

    private function markdownToHtml(?string $markdown = null): string
    {
        if (empty($markdown)) {
            return '';
        }

        $markdown = preg_replace('/\*\*(.*?)\*\*/u', '<strong>$1</strong>', $markdown);
        $markdown = preg_replace('/#####(.*?)#####/s', '<h5>$1</h5>', $markdown);
        $markdown = preg_replace('/####(.*?)####/s', '<h4>$1</h4>', $markdown);
        $markdown = preg_replace('/###(.*?)###/s', '<h3>$1</h3>', $markdown);
        $markdown = preg_replace('/##(.*?)##/s', '<h2>$1</h2>', $markdown);
        //$markdown = preg_replace('/(?:\*\*|-\s)(.*?)(?:\r\n|$)/', '<li>$1</li>', $markdown);
        $markdown = preg_replace('/\r\n<li>(.*?)<\/li>\r\n/s', '</p><ul><li>$1</li></ul><p>', $markdown);
        $markdown = preg_replace('/\r\n\r\n/', '</p><p>', $markdown);
        $markdown = str_replace("\r\n", '<br>', $markdown);
        $markdown = '<p>' . $markdown . '</p>';
        $markdown = str_replace(['<p></p>', '<p><br></p>'], '', $markdown);

        // remove base64 images
        return preg_replace('/<img src="data:image(?:.*?)>/s', '', $markdown);
    }

    private function saveThumbnail(PageModel $page): void
    {
        try {
            // handle thumbnail / main image
            if (!empty($this->image_url) && $page->mediaUses()->exists() === false) {
                $thumbnail = Media::importImageFromUrl($this->image_url);
                $thumbnail->update(['folder_id' => $this->media_folder->id]);
                Media::setUsesForModel([$thumbnail->id], $page, ['primary' => true]);
            }
        } catch (Exception $exception) {
            Log::error('Exception: ' . $exception->getMessage());
        }
    }

    private function getSlug(string $url): ?string
    {
        $slug = last(explode('/', $url));
        $slug = str_replace('.html', '', $slug);
        return strtolower(Str::slug($slug));
    }

    private function updatePageSeo(PageModel $page, DOMXPath $xpath): void
    {
        $seo_data = [];

        foreach (['description', 'keywords'] as $metaName) {
            $meta = $xpath->query("//meta[@name='$metaName']")->item(0);
            if ($meta) {
                $seo_data[$metaName] = $meta->getAttribute('content');
            }
        }

        SeoData::query()->updateOrCreate([
            'path' => $page->urlPath()
        ], [
            'title' => $page->title,
            'description' => $seo_data['description'],
        ]);
    }

    private function getPublishedDate(DOMXPath $xpath): ?string
    {
        try {
            // Full date from .post-date
            $dateNode = $xpath->query("//*[contains(@class, 'post-date')]")->item(0);

            // we are seeing post dates like: "April 07, 20257"
            // so need to try to handle date correctly
            $date_text = $dateNode ? trim($dateNode->textContent) : null;
            $date_text = str_replace(',', '', $date_text);
            $date_object = Carbon::createFromFormat('F d Y', $date_text);
            return $date_object->format('Y-m-d H:i:s');
        } catch (Exception $exception) {
            Log::error('Error parsing date', ['date' => $dateNode->textContent]);
        }

        return null;
    }

    private function getTemplateId(): ?int
    {
        $template = Template::query()
            ->where('slug', 'blog-article')
            ->first();

        if (!$template) {
            throw new Exception('template for blog import not found');
        }

        return $template->id;
    }

    private function getTitle(DOMDocument $doc): ?string
    {
        $titleNode = $doc->getElementsByTagName('title')->item(0);
        $title = $titleNode ? trim($titleNode->textContent) : null;

        if (empty($title)) {
            $title = 'Untitled';
        }

        return $title;
    }

    private function getCategory(): string
    {
        return 'blog';
    }

    private function importImage(string $original_image_src): \Mtc\ContentManager\Contracts\Media
    {
        $original_image_src = str_replace(' ', '%20', $original_image_src);
        $new_image = Media::importImageFromUrl($original_image_src);

        \Mtc\ContentManager\Models\Media::query()->where('id', $new_image->id)
            ->update(['folder_id' => $this->media_folder->id]);

        return $new_image;
    }
}
