import os import re import sys from bs4 import BeautifulSoup, NavigableString, Tag, Comment class HTMLToBBCode: def __init__(self): self.conversion_map = { 'b': ['[b]', '[/b]'], 'i': ['[i]', '[/i]'], 'u': ['[u]', '[/u]'], 'strong': ['[b]', '[/b]'], 'em': ['[i]', '[/i]'], 'strike': ['[s]', '[/s]'], 'h1': ['[size=7]', '[/size]'], 'h2': ['[size=6]', '[/size]'], 'h3': ['[size=5]', '[/size]'], 'h4': ['[size=4]', '[/size]'], 'h5': ['[size=3]', '[/size]'], 'h6': ['[size=2]', '[/size]'], 'center': ['[center]', '[/center]'], 'div': ['', ''], # Handle div attributes separately } # URL replacements (from -> to) self.url_replacements = { 'https://polymart.org/resource/vehiclesplus-1-12-1-20-2.633?purchase=1': 'https://www.spigotmc.org/resources/vehiclesplus-1-12-1-20-2.70523/purchase', 'https://polymart.org/resource?spigot_id=1997': 'https://www.spigotmc.org/resources/protocollib.1997/', 'https://polymart.org/resource?spigot_id=34315': 'https://www.spigotmc.org/resources/vault.34315/', 'https://polymart.org/resource?spigot_id=9089': 'https://www.spigotmc.org/resources/essentialsx.9089/', 'https://sbdevelopment.tech/images/buttons/buy2.png': 'https://sbdevelopment.tech/images/buttons/buy.png', 'https://sbdevelopment.tech/images/buttons/wiki2.png': 'https://sbdevelopment.tech/images/buttons/wiki.png', 'https://sbdevelopment.tech/images/buttons/discord2.png': 'https://sbdevelopment.tech/images/buttons/discord.png', 'https://sbdevelopment.tech/images/buttons/website2.png': 'https://sbdevelopment.tech/images/buttons/website.png' } def _convert_font_size(self, size, unit): """Convert font size to BBCode size scale 1-7""" if unit == 'em': # Convert em to scale 1-7 # em values: 1=normal, 2=large, 3=larger size = float(size) if size <= 0.8: return 1 elif size <= 1: return 2 elif size <= 1.5: return 3 elif size <= 2: return 4 elif size <= 2.5: return 5 elif size <= 3: return 6 else: return 7 elif unit == 'pt': # Convert pt to scale 1-7 size = int(size) if size <= 8: return 1 elif size <= 10: return 2 elif size <= 12: return 3 elif size <= 14: return 4 elif size <= 16: return 5 elif size <= 20: return 6 else: return 7 return 3 # Default to normal size @staticmethod def _convert_youtube_url(url): # Extract video ID from YouTube URL if 'youtube.com' in url: video_id = url.split('v=')[-1].split('&')[0] return f'[MEDIA=youtube]{video_id}[/MEDIA]' elif 'youtu.be' in url: video_id = url.split('/')[-1].split('?')[0] return f'[MEDIA=youtube]{video_id}[/MEDIA]' return None def convert(self, html_content): # Create BeautifulSoup object soup = BeautifulSoup(html_content, 'html.parser') # Remove script, style elements and comments for element in soup(['script', 'style']): element.decompose() # Remove HTML comments for comment in soup.find_all(string=lambda text: isinstance(text, Comment)): comment.extract() # Convert the HTML to BBCode bbcode = self._process_tag(soup) # Fix [CENTER] tag formatting - ensure single newline before [CENTER] and after content bbcode = re.sub(r'\n+\[CENTER\]', '\n[CENTER]', bbcode) bbcode = re.sub(r'([^\n])\[CENTER\]', r'\1\n[CENTER]', bbcode) # Ensure [/CENTER] is on its own line bbcode = re.sub(r'\[/CENTER\]([^\n])', r'[/CENTER]\n\1', bbcode) # Clean up multiple consecutive newlines, but preserve single newlines bbcode = re.sub(r'\n{3,}', '\n\n', bbcode) # Remove any trailing whitespace return bbcode.strip() def _process_tag(self, element): if isinstance(element, NavigableString): # Clean up text content but preserve trailing space text = element.string if element.string else '' # Keep trailing space if it exists has_trailing_space = text.endswith(' ') # Normalize internal spaces text = ' '.join(text.split()) # Restore trailing space if it existed if has_trailing_space: text += ' ' return text # Skip elements with hidden-bbcode class if isinstance(element, Tag) and 'hidden-bbcode' in element.get('class', []): return '' # Handle br tags immediately if element.name == 'br': return '\n' # Handle p tags if element.name == 'p': content = self._get_inner_content(element) style = element.get('style', '') if style: tags = [] # Handle font size size_match = re.search(r'font-size:\s*(\d+(?:\.\d+)?)(pt|em)', style) if size_match: size = size_match.group(1) unit = size_match.group(2) bbcode_size = self._convert_font_size(size, unit) tags.append(f'[SIZE={bbcode_size}]') # Handle color color_match = re.search(r'color:\s*rgb\((\d+),\s*(\d+),\s*(\d+)\)', style) if color_match: rgb = f'#{int(color_match.group(1)):02x}{int(color_match.group(2)):02x}{int(color_match.group(3)):02x}' tags.append(f'[COLOR={rgb}]') # Apply all opening tags for tag in tags: content = tag + content # Apply all closing tags in reverse order for tag in reversed(tags): content += f'[/{tag[1:tag.index("=")]}]' if '=' in tag else f'[/{tag[1:]}]' # Only add newlines if there's actual content and we're not in a special container if content.strip() and not element.find_parent(['blockquote', 'center']): # Check if the content is just a single element (like an IMG or MEDIA tag) if re.match(r'^\[(?:IMG|MEDIA)[^\]]*\][^\[]*\[/(?:IMG|MEDIA)\]$', content.strip()): return f'{content}\n' return f'{content}\n\n' return content # Handle styled elements (span) if element.name == 'span': style = element.get('style', '') if style: content = self._get_inner_content(element) tags = [] # Handle font size size_match = re.search(r'font-size:\s*(\d+(?:\.\d+)?)(pt|em)', style) if size_match: size = size_match.group(1) unit = size_match.group(2) bbcode_size = self._convert_font_size(size, unit) tags.append(f'[SIZE={bbcode_size}]') # Handle color color_match = re.search(r'color:\s*rgb\((\d+),\s*(\d+),\s*(\d+)\)', style) if color_match: rgb = f'#{int(color_match.group(1)):02x}{int(color_match.group(2)):02x}{int(color_match.group(3)):02x}' tags.append(f'[COLOR={rgb}]') # Apply all opening tags for tag in tags: content = tag + content # Apply all closing tags in reverse order for tag in reversed(tags): content += f'[/{tag[1:tag.index("=")]}]' if '=' in tag else f'[/{tag[1:]}]' return content else: content = self._get_inner_content(element) return content # Handle warning paragraph if element.name == 'p' and element.find('a', {'class': 'externalLink ProxyLink'}): warning_text = self._get_inner_content(element) return f'[SIZE=3][COLOR=rgb(255, 128, 0)]{warning_text}[/COLOR][/SIZE]\n\n' # Handle text alignment in divs if element.name == 'div': style = element.get('style', '') if 'text-align: center' in style: inner_content = self._get_inner_content(element).strip() if inner_content: # Both opening and closing tags on their own lines return f'[CENTER]\n{inner_content}\n[/CENTER]' return '' # Handle blockquotes - strip all tags inside if element.name == 'blockquote': # Get raw text without any formatting content = '' for text in element.stripped_strings: content += text + '\n' return f'[QUOTE]\n{content.strip()}\n[/QUOTE]' # Handle links if element.name == 'a': href = element.get('href', '') if href: # Check if it's a YouTube URL first youtube_bbcode = self._convert_youtube_url(href) if youtube_bbcode: return youtube_bbcode # Otherwise handle as normal URL href = self.url_replacements.get(href, href) content = self._get_inner_content(element) # Check if there's space after the link next_sibling = element.next_sibling has_space = next_sibling and isinstance(next_sibling, NavigableString) and next_sibling.startswith(' ') # Use single quotes for URLs and preserve space return f"[URL='{href}']{content}[/URL]" + (' ' if has_space else '') return '' # Handle images if element.name == 'img': src = element.get('src', '') if src: src = self.url_replacements.get(src, src) return f'[IMG]{src}[/IMG]' return '' # Handle lists if element.name in ['ul', 'ol']: result = '[LIST]' if element.name == 'ul' else '[LIST=1]' result += '\n' for item in element.find_all('li', recursive=False): result += '[*]' + self._get_inner_content(item).strip() + '\n' result += '[/LIST]\n' return result # Handle pre/code blocks if element.name == 'pre': if element.find('code'): # Get the raw content preserving original formatting code_content = '' for string in element.find('code').strings: code_content += string if code_content: # Preserve original formatting for HTML content if 'language-markup' in element.get('class', []): return f'[html]\n{code_content}\n[/html]' return f'[code]\n{code_content}\n[/code]' return '' # Handle basic formatting if element.name in self.conversion_map: content = self._get_inner_content(element) if content.strip(): return f"{self.conversion_map[element.name][0]}{content}{self.conversion_map[element.name][1]}" # Process all other tags return self._get_inner_content(element) def _get_inner_content(self, element): return ''.join(self._process_tag(child) for child in element.children) def convert_html_to_bbcode(html_content): """ Convert HTML content to BBCode. Args: html_content (str): The HTML content to convert Returns: str: The converted BBCode content """ converter = HTMLToBBCode() return converter.convert(html_content) def convert_file(input_file): """ Convert an HTML file to BBCode and save it with .bbcode extension Args: input_file (str): Path to the HTML file """ if not input_file.endswith('.html'): print(f"Error: Input file '{input_file}' must have .html extension") return False output_file = input_file[:-5] + '.bbcode' # Replace .html with .bbcode try: with open(input_file, 'r', encoding='utf-8') as f: html_content = f.read() bbcode = convert_html_to_bbcode(html_content) with open(output_file, 'w', encoding='utf-8') as f: f.write(bbcode) print(f"Successfully converted '{input_file}' to '{output_file}'") return True except Exception as e: print(f"Error converting file: {str(e)}") return False if __name__ == "__main__": if len(sys.argv) != 2: print("Usage: python html_to_bbcode.py ") sys.exit(1) input_file = sys.argv[1] if not os.path.exists(input_file): print(f"Error: File '{input_file}' does not exist") sys.exit(1) success = convert_file(input_file) sys.exit(0 if success else 1)