Source code for converters.md2html_converter

from __future__ import print_function, unicode_literals
import os
import string
import markdown
import markdown2
import codecs
from shutil import copyfile
from bs4 import BeautifulSoup
from libraries.general_tools.file_utils import write_file, get_files
from converter import Converter


[docs]class Md2HtmlConverter(Converter):
[docs] def convert(self): if self.resource == "obs": self.convert_obs() return True else: self.convert_markdown() return True
[docs] def convert_obs(self): self.log.info('Processing OBS markdown files') # find the first directory that has md files. files = get_files(directory=self.files_dir, exclude=self.EXCLUDED_FILES) current_dir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(current_dir, 'templates', 'template.html')) as template_file: html_template = string.Template(template_file.read()) found_chapters = {} for filename in files: if filename.endswith('.md'): # Convert files that are markdown files with codecs.open(filename, 'r', 'utf-8-sig') as md_file: md = md_file.read() html = markdown.markdown(md) html = html_template.safe_substitute(title=self.source.upper(), content=html) base_name = os.path.splitext(os.path.basename(filename))[0] found_chapters[base_name] = True html_filename = base_name + ".html" output_file = os.path.join(self.output_dir, html_filename) write_file(output_file, html) self.log.info('Converted {0} to {1}.'.format(os.path.basename(filename), os.path.basename(html_filename))) else: # Directly copy over files that are not markdown files try: output_file = os.path.join(self.output_dir, os.path.basename(filename)) if not os.path.exists(output_file): copyfile(filename, output_file) except: pass self.log.info('Finished processing OBS Markdown files.')
[docs] def convert_markdown(self): self.log.info('Processing Markdown files') # find the first directory that has md files. files = get_files(directory=self.files_dir, exclude=self.EXCLUDED_FILES) convert_only_list = self.check_for_exclusive_convert() current_dir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(current_dir, 'templates', 'template.html')) as template_file: html_template = string.Template(template_file.read()) found_chapters = {} for filename in files: if filename.endswith('.md'): base_name = os.path.basename(filename) if convert_only_list and (base_name not in convert_only_list): # see if this is a file we are to convert continue # Convert files that are markdown files with codecs.open(filename, 'r', 'utf-8-sig') as md_file: md = md_file.read() if self.resource in ['ta']: html = markdown2.markdown(md, extras=['markdown-in-html', 'tables']) else: html = markdown.markdown(md) html = html_template.safe_substitute(title=self.resource.upper(), content=html) # Change headers like <h1><a id="verbs"/>Verbs</h1> to <h1 id="verbs">Verbs</h1> soup = BeautifulSoup(html, 'html.parser') for tag in soup.findAll('a', {'id': True}): if tag.parent and tag.parent.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: tag.parent['id'] = tag['id'] tag.parent['class'] = tag.parent.get('class', []) + ['section-header'] tag.extract() html = unicode(soup) base_name = os.path.splitext(os.path.basename(filename))[0] found_chapters[base_name] = True html_filename = base_name + ".html" output_file = os.path.join(self.output_dir, html_filename) write_file(output_file, html) self.log.info('Converted {0} to {1}.'.format(os.path.basename(filename), os.path.basename(html_filename))) else: # Directly copy over files that are not markdown files try: output_file = os.path.join(self.output_dir, os.path.basename(filename)) if not os.path.exists(output_file): copyfile(filename, output_file) except: pass self.log.info('Finished processing Markdown files.')