import os
import re
import sys
from bs4 import BeautifulSoup, NavigableString, Tag, Comment


class HTMLToBBCode:
    def __init__(self):
        self.conversion_map = {
            'b': ['[b]', '[/b]'],
            'i': ['[i]', '[/i]'],
            'u': ['[u]', '[/u]'],
            'strong': ['[b]', '[/b]'],
            'em': ['[i]', '[/i]'],
            'strike': ['[s]', '[/s]'],
            'h1': ['[size=7]', '[/size]'],
            'h2': ['[size=6]', '[/size]'],
            'h3': ['[size=5]', '[/size]'],
            'h4': ['[size=4]', '[/size]'],
            'h5': ['[size=3]', '[/size]'],
            'h6': ['[size=2]', '[/size]'],
            'center': ['[center]', '[/center]'],
            'div': ['', ''],  # Handle div attributes separately
        }

        # URL replacements (from -> to)
        self.url_replacements = {
            'https://polymart.org/resource/vehiclesplus-1-12-1-20-2.633?purchase=1':
                'https://www.spigotmc.org/resources/vehiclesplus-1-12-1-20-2.70523/purchase',
            'https://polymart.org/resource?spigot_id=1997':
                'https://www.spigotmc.org/resources/protocollib.1997/',
            'https://polymart.org/resource?spigot_id=34315':
                'https://www.spigotmc.org/resources/vault.34315/',
            'https://polymart.org/resource?spigot_id=9089':
                'https://www.spigotmc.org/resources/essentialsx.9089/',
            'https://sbdevelopment.tech/images/buttons/buy2.png':
                'https://sbdevelopment.tech/images/buttons/buy.png',
            'https://sbdevelopment.tech/images/buttons/wiki2.png':
                'https://sbdevelopment.tech/images/buttons/wiki.png',
            'https://sbdevelopment.tech/images/buttons/discord2.png':
                'https://sbdevelopment.tech/images/buttons/discord.png',
            'https://sbdevelopment.tech/images/buttons/website2.png':
                'https://sbdevelopment.tech/images/buttons/website.png'
        }

    def _convert_font_size(self, size, unit):
        """Convert font size to BBCode size scale 1-7"""
        if unit == 'em':
            # Convert em to scale 1-7
            # em values: 1=normal, 2=large, 3=larger
            size = float(size)
            if size <= 0.8:
                return 1
            elif size <= 1:
                return 2
            elif size <= 1.5:
                return 3
            elif size <= 2:
                return 4
            elif size <= 2.5:
                return 5
            elif size <= 3:
                return 6
            else:
                return 7
        elif unit == 'pt':
            # Convert pt to scale 1-7
            size = int(size)
            if size <= 8:
                return 1
            elif size <= 10:
                return 2
            elif size <= 12:
                return 3
            elif size <= 14:
                return 4
            elif size <= 16:
                return 5
            elif size <= 20:
                return 6
            else:
                return 7
        return 3  # Default to normal size

    @staticmethod
    def _convert_youtube_url(url):
        # Extract video ID from YouTube URL
        if 'youtube.com' in url:
            video_id = url.split('v=')[-1].split('&')[0]
            return f'[MEDIA=youtube]{video_id}[/MEDIA]'
        elif 'youtu.be' in url:
            video_id = url.split('/')[-1].split('?')[0]
            return f'[MEDIA=youtube]{video_id}[/MEDIA]'
        return None

    def convert(self, html_content):
        # Create BeautifulSoup object
        soup = BeautifulSoup(html_content, 'html.parser')

        # Remove script, style elements and comments
        for element in soup(['script', 'style']):
            element.decompose()
        
        # Remove HTML comments
        for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):
            comment.extract()

        # Convert the HTML to BBCode
        bbcode = self._process_tag(soup)

        # Fix [CENTER] tag formatting - ensure single newline before [CENTER] and after content
        bbcode = re.sub(r'\n+\[CENTER\]', '\n[CENTER]', bbcode)
        bbcode = re.sub(r'([^\n])\[CENTER\]', r'\1\n[CENTER]', bbcode)
        
        # Ensure [/CENTER] is on its own line
        bbcode = re.sub(r'\[/CENTER\]([^\n])', r'[/CENTER]\n\1', bbcode)
        
        # Clean up multiple consecutive newlines, but preserve single newlines
        bbcode = re.sub(r'\n{3,}', '\n\n', bbcode)
        
        # Remove any trailing whitespace
        return bbcode.strip()

    def _process_tag(self, element):
        if isinstance(element, NavigableString):
            # Clean up text content but preserve trailing space
            text = element.string if element.string else ''
            # Keep trailing space if it exists
            has_trailing_space = text.endswith(' ')
            # Normalize internal spaces
            text = ' '.join(text.split())
            # Restore trailing space if it existed
            if has_trailing_space:
                text += ' '
            return text

        # Skip elements with hidden-bbcode class
        if isinstance(element, Tag) and 'hidden-bbcode' in element.get('class', []):
            return ''

        # Handle br tags immediately
        if element.name == 'br':
            return '\n'

        # Handle p tags
        if element.name == 'p':
            content = self._get_inner_content(element)
            style = element.get('style', '')
            
            if style:
                tags = []
                # Handle font size
                size_match = re.search(r'font-size:\s*(\d+(?:\.\d+)?)(pt|em)', style)
                if size_match:
                    size = size_match.group(1)
                    unit = size_match.group(2)
                    bbcode_size = self._convert_font_size(size, unit)
                    tags.append(f'[SIZE={bbcode_size}]')

                # Handle color
                color_match = re.search(r'color:\s*rgb\((\d+),\s*(\d+),\s*(\d+)\)', style)
                if color_match:
                    rgb = f'#{int(color_match.group(1)):02x}{int(color_match.group(2)):02x}{int(color_match.group(3)):02x}'
                    tags.append(f'[COLOR={rgb}]')

                # Apply all opening tags
                for tag in tags:
                    content = tag + content

                # Apply all closing tags in reverse order
                for tag in reversed(tags):
                    content += f'[/{tag[1:tag.index("=")]}]' if '=' in tag else f'[/{tag[1:]}]'

            # Only add newlines if there's actual content and we're not in a special container
            if content.strip() and not element.find_parent(['blockquote', 'center']):
                # Check if the content is just a single element (like an IMG or MEDIA tag)
                if re.match(r'^\[(?:IMG|MEDIA)[^\]]*\][^\[]*\[/(?:IMG|MEDIA)\]$', content.strip()):
                    return f'{content}\n'
                return f'{content}\n\n'
            return content

        # Handle styled elements (span)
        if element.name == 'span':
            style = element.get('style', '')
            if style:
                content = self._get_inner_content(element)
                tags = []

                # Handle font size
                size_match = re.search(r'font-size:\s*(\d+(?:\.\d+)?)(pt|em)', style)
                if size_match:
                    size = size_match.group(1)
                    unit = size_match.group(2)
                    bbcode_size = self._convert_font_size(size, unit)
                    tags.append(f'[SIZE={bbcode_size}]')

                # Handle color
                color_match = re.search(r'color:\s*rgb\((\d+),\s*(\d+),\s*(\d+)\)', style)
                if color_match:
                    rgb = f'#{int(color_match.group(1)):02x}{int(color_match.group(2)):02x}{int(color_match.group(3)):02x}'
                    tags.append(f'[COLOR={rgb}]')

                # Apply all opening tags
                for tag in tags:
                    content = tag + content

                # Apply all closing tags in reverse order
                for tag in reversed(tags):
                    content += f'[/{tag[1:tag.index("=")]}]' if '=' in tag else f'[/{tag[1:]}]'

                return content
            else:
                content = self._get_inner_content(element)
                return content

        # Handle warning paragraph
        if element.name == 'p' and element.find('a', {'class': 'externalLink ProxyLink'}):
            warning_text = self._get_inner_content(element)
            return f'[SIZE=3][COLOR=rgb(255, 128, 0)]{warning_text}[/COLOR][/SIZE]\n\n'

        # Handle text alignment in divs
        if element.name == 'div':
            style = element.get('style', '')
            if 'text-align: center' in style:
                inner_content = self._get_inner_content(element).strip()
                if inner_content:
                    # Both opening and closing tags on their own lines
                    return f'[CENTER]\n{inner_content}\n[/CENTER]'
                return ''

        # Handle blockquotes - strip all tags inside
        if element.name == 'blockquote':
            # Get raw text without any formatting
            content = ''
            for text in element.stripped_strings:
                content += text + '\n'
            return f'[QUOTE]\n{content.strip()}\n[/QUOTE]'

        # Handle links
        if element.name == 'a':
            href = element.get('href', '')
            if href:
                # Check if it's a YouTube URL first
                youtube_bbcode = self._convert_youtube_url(href)
                if youtube_bbcode:
                    return youtube_bbcode

                # Otherwise handle as normal URL
                href = self.url_replacements.get(href, href)
                content = self._get_inner_content(element)
                # Check if there's space after the link
                next_sibling = element.next_sibling
                has_space = next_sibling and isinstance(next_sibling, NavigableString) and next_sibling.startswith(' ')
                # Use single quotes for URLs and preserve space
                return f"[URL='{href}']{content}[/URL]" + (' ' if has_space else '')
            return ''

        # Handle images
        if element.name == 'img':
            src = element.get('src', '')
            if src:
                src = self.url_replacements.get(src, src)
                return f'[IMG]{src}[/IMG]'
            return ''

        # Handle lists
        if element.name in ['ul', 'ol']:
            result = '[LIST]' if element.name == 'ul' else '[LIST=1]'
            result += '\n'
            for item in element.find_all('li', recursive=False):
                result += '[*]' + self._get_inner_content(item).strip() + '\n'
            result += '[/LIST]\n'
            return result

        # Handle pre/code blocks
        if element.name == 'pre':
            if element.find('code'):
                # Get the raw content preserving original formatting
                code_content = ''
                for string in element.find('code').strings:
                    code_content += string
                if code_content:
                    # Preserve original formatting for HTML content
                    if 'language-markup' in element.get('class', []):
                        return f'[html]\n{code_content}\n[/html]'
                    return f'[code]\n{code_content}\n[/code]'
            return ''

        # Handle basic formatting
        if element.name in self.conversion_map:
            content = self._get_inner_content(element)
            if content.strip():
                return f"{self.conversion_map[element.name][0]}{content}{self.conversion_map[element.name][1]}"

        # Process all other tags
        return self._get_inner_content(element)

    def _get_inner_content(self, element):
        return ''.join(self._process_tag(child) for child in element.children)


def convert_html_to_bbcode(html_content):
    """
    Convert HTML content to BBCode.
    
    Args:
        html_content (str): The HTML content to convert
        
    Returns:
        str: The converted BBCode content
    """
    converter = HTMLToBBCode()
    return converter.convert(html_content)


def convert_file(input_file):
    """
    Convert an HTML file to BBCode and save it with .bbcode extension
    
    Args:
        input_file (str): Path to the HTML file
    """
    if not input_file.endswith('.html'):
        print(f"Error: Input file '{input_file}' must have .html extension")
        return False

    output_file = input_file[:-5] + '.bbcode'  # Replace .html with .bbcode

    try:
        with open(input_file, 'r', encoding='utf-8') as f:
            html_content = f.read()

        bbcode = convert_html_to_bbcode(html_content)

        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(bbcode)

        print(f"Successfully converted '{input_file}' to '{output_file}'")
        return True

    except Exception as e:
        print(f"Error converting file: {str(e)}")
        return False


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python html_to_bbcode.py <input_file.html>")
        sys.exit(1)

    input_file = sys.argv[1]
    if not os.path.exists(input_file):
        print(f"Error: File '{input_file}' does not exist")
        sys.exit(1)

    success = convert_file(input_file)
    sys.exit(0 if success else 1)