Mercurial > md2html

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Cargo.toml	Thu Feb 16 15:22:52 2023 +0100
@@ -0,0 +1,9 @@
+[package]
+name = "maker"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+base64 = "0.21.0"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/blockquote/mod.rs	Thu Feb 16 15:22:52 2023 +0100
@@ -0,0 +1,25 @@
+
+// blockquote handling
+pub fn deal_with_blockquote(html_lines: &mut Vec<String>, blockquote: &mut i8, ip: &mut String){
+    let mut current_blockq: i8 = 0;
+    while ip.chars().nth(0).unwrap() == '>' {
+        *ip = ip[1..].to_string();
+        current_blockq = current_blockq + 1;
+    }
+    if current_blockq != *blockquote {
+        let saved_bq = current_blockq;
+        if current_blockq < *blockquote { // the code has less blockquote, close a few ones
+            while current_blockq != *blockquote {
+                html_lines.push("</blockquote>".to_string());
+                current_blockq = current_blockq + 1;
+            }
+        }
+        else {
+            while current_blockq != *blockquote {
+                html_lines.push("<blockquote>".to_string());
+                current_blockq = current_blockq - 1;
+            }
+        }
+        *blockquote = saved_bq;
+    }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/header/mod.rs	Thu Feb 16 15:22:52 2023 +0100
@@ -0,0 +1,47 @@
+use crate::interpret_line;
+
+// returns true if the previous line is handled as a header, ie. this line contains only
+// ---------- or ===========, in this case it will change the vector of lines and return true
+pub fn handle_as_header(ip:&String, html_lines: &mut Vec<String>, ht:usize, bold_it: &mut bool, bold:&mut bool, italic:&mut bool) -> bool {
+    let ch = ['-', '='];
+    if only(&ip, ch[ht - 1]) {
+        let last_line:String = html_lines.last().unwrap().to_owned();
+        let header = format_header(last_line, ht, ht, bold_it, bold, italic);
+        html_lines.pop();
+        html_lines.push(header);
+        return true;
+    }
+    return false;
+}
+
+// formats the header
+pub fn format_header(s:String, c:usize, d:usize, bold_it:&mut bool, bold:&mut bool, italic:&mut bool) -> String {
+    let m: String =  s[(c-d)..].to_string().trim().to_string();
+    let m = interpret_line(&m, bold_it, bold, italic);
+    let nr = String::from(c.to_string());
+    let cp = ">\n";
+    let h:String = "\n<h".to_string() + &nr + &cp + &m + &"\n</h".to_string() + &nr + &cp;
+    return h;
+}
+
+// checks if the string contains only the given character
+fn only(s:&String, c:char) -> bool
+{
+    if s.is_empty() {
+        return false;
+    }
+    for sc in s.chars() {
+        if sc != c {
+            return false;
+        }
+    }
+    return true;
+}
+
+// returns true if the line is a header line, ie. starts with a set of #'s
+pub fn header(s:&str, c:usize) -> bool {
+    if s.starts_with( &'#'.to_string().repeat(c)) {
+        return true
+    }
+    return false
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/list/mod.rs	Thu Feb 16 15:22:52 2023 +0100
@@ -0,0 +1,113 @@
+// deals with increasing the <ol> tags, by trying to identify lines starting with spaces/tabs
+pub fn deal_with_list_nesting(orig_ip: &String, olist_space_count: &mut i8, olist_count: &mut i8, html_lines: &mut Vec<String>, i: i8, open_tag: &str, close_tag: &str) {
+    let mut start = "\t";
+    if orig_ip.starts_with("    ") {
+        start = "    ";
+    }
+    let mut current_space_count:i8 = i;
+    let mut oip_cp = &orig_ip[start.len() ..];
+
+    while oip_cp.starts_with(start) {
+        oip_cp = &oip_cp[start.len() ..];
+        current_space_count += 1;
+    }
+
+    if *olist_space_count != current_space_count {
+        if *olist_space_count < current_space_count {
+            *olist_count += 1;
+            html_lines.push(open_tag.to_string());
+        } else {
+            *olist_count -= 1;
+            html_lines.push(close_tag.to_string());
+        }
+        *olist_space_count = current_space_count;
+    }
+}
+
+// closes the opened lists with the given close tag
+pub fn close_opened_lists(html_lines: &mut Vec<String>, list_nest_count: &mut i8, list_space_count:  &mut i8, close_tag: &str) {
+    println!("For {} nestcount={}",close_tag, list_nest_count);
+    while *list_nest_count > 0 {
+        html_lines.push(close_tag.to_string());
+        *list_nest_count -= 1;
+    }
+    *list_space_count = 0;
+}
+
+// deals with some list, checks lines that start with the given start_checker
+pub fn deal_with_list<C>(mut html_lines: &mut Vec<String>, mut list_nest_count: &mut i8,
+                     mut list_space_count: &mut i8, orig_ip: &String,
+                     tip:&str, list_start_checker: C,
+                     open_tag: &str, close_tag: &str, list_delim_char: char)
+where C: Fn(&str, isize) -> bool
+{
+    let mut cidxc:isize = 0;
+    if list_start_checker(tip, cidxc) {
+        println!("list start: {}", tip);
+        while list_start_checker(tip, cidxc) {
+            println!("list start: {}, {}", tip, cidxc);
+            cidxc = cidxc + 1;
+        }
+        if tip.chars().nth(cidxc.try_into().unwrap()).unwrap() == list_delim_char {
+            // this is indeed starting a list
+            if list_delim_char == '.' { // ordered list
+                let nr = tip[..cidxc.try_into().unwrap()].to_string().parse::<i32>().unwrap();
+                if nr == 1 {
+                    if *list_nest_count == 0 { // the very first list
+                        *list_nest_count = 1;
+                        html_lines.push(open_tag.to_string());
+                    } else { // see if we start with a tab or not
+                        resolve_list_entry(&mut html_lines, &mut list_nest_count, &mut list_space_count, orig_ip, open_tag, close_tag)
+                    }
+                } else {
+                    // the number is not one. try to see if it is a different list or continues
+                    resolve_list_entry(&mut html_lines, &mut list_nest_count, &mut list_space_count, orig_ip, open_tag, close_tag)
+                }
+            } else { // unordered list
+                if *list_nest_count == 0 { // the very first list
+                    *list_nest_count = 1;
+                    html_lines.push(open_tag.to_string());
+                } else {
+                    resolve_list_entry(&mut html_lines, &mut list_nest_count, &mut list_space_count, orig_ip, open_tag, close_tag)
+                }
+            }
+        } else {
+            close_opened_lists(&mut html_lines, &mut list_nest_count, &mut list_space_count, close_tag);
+        }
+    } else {
+        close_opened_lists(&mut html_lines, &mut list_nest_count, &mut list_space_count, close_tag);
+    }
+}
+
+// closes the <ol> tags opened
+pub fn insert_list_entry<C>(list_counter: &mut i8, tip: &str, inted: &mut String, list_start_checker:C, sep_char: char)
+where C: Fn(&str, isize) -> bool
+{
+    if *list_counter != 0 {
+        let mut cidxc = 0;
+        if list_start_checker(inted, cidxc) {
+            while  list_start_checker(tip, cidxc) {
+                cidxc = cidxc + 1;
+            }
+            if tip.chars().nth(cidxc.try_into().unwrap()).unwrap() == sep_char {
+                cidxc += 1;
+            }
+            *inted = "<li>".to_owned() + &inted[cidxc.try_into().unwrap()..].trim();
+        } else {
+            *inted = "<li>".to_owned() + &inted;
+        }
+    }
+}
+
+// resolves a list entry
+pub fn resolve_list_entry(mut html_lines: &mut Vec<String>, mut olist_count: &mut i8, mut olist_space_count: &mut i8, orig_ip: &String, open_tag: &str, close_tag: &str)
+{
+    if orig_ip.starts_with("\t") || orig_ip.starts_with("    ") {
+        deal_with_list_nesting(orig_ip, &mut olist_space_count, &mut olist_count, &mut html_lines, 1, open_tag, close_tag);
+    } else { // maybe we slipped back one ol
+        while *olist_count > 1 {
+            *olist_count -= 1;
+            html_lines.push(close_tag.to_string());
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/main.rs	Thu Feb 16 15:22:52 2023 +0100
@@ -0,0 +1,231 @@
+mod list;
+mod header;
+mod blockquote;
+
+use base64::{engine::general_purpose, Engine as _};
+
+use crate::list::close_opened_lists;
+use crate::list::deal_with_list;
+use crate::list::insert_list_entry;
+
+use crate::header::header;
+use crate::header::format_header;
+use crate::header::handle_as_header;
+
+use crate::blockquote::deal_with_blockquote;
+
+use std::{env, process};
+use std::fs::{File};
+use std::io::{self, BufRead, BufReader, Read, Write};
+use std::path::{Path, PathBuf};
+
+// The output is wrapped in a Result to allow matching on errors
+// Returns an Iterator to the Reader of the lines of the file.
+fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
+where P: AsRef<Path>, {
+    let file = File::open(filename)?;
+    Ok(io::BufReader::new(file).lines())
+}
+
+// returns the current working directory
+fn get_current_working_dir() -> std::io::Result<PathBuf> {
+    env::current_dir()
+}
+
+//
+// bold_it - whether a bold_italic was started (true) or not (false), goes out, comes in
+// bold    - whether the bold tag was activated or not
+// italic  - whether the italic tag was activated or not
+fn interpret_line(p :&str, bold_it:&mut bool, bold:&mut bool, italic:&mut bool) -> String
+{
+    let s = p.trim();
+
+    // first check: is this a header?
+    let hsize = [6,5,4,3,2,1];
+    for hs in hsize {
+        if header(s, hs) {
+            return format_header(s.to_string(), hs, 0, bold_it, bold, italic);
+        }
+    }
+
+    let mut t = s.to_string();
+
+    handle_tag(&mut t, bold_it, "***", "<strong><em>", "</strong></em>");
+    handle_tag(&mut t, bold, "**", "<strong>", "</strong>");
+    handle_tag(&mut t, italic, "*", "<em>", "</em>");
+
+    // image?
+    let mut imgsp = t.find("![");
+    if imgsp != None {
+        while imgsp != None {
+            let mut res:String = String::new();
+            res = res + &t.as_str()[..imgsp.unwrap()];
+            let imgep = t.find("](");
+            if imgep != None {
+                let tag = &t[imgsp.unwrap() + 2..imgep.unwrap()];
+                let closep = t.find(")");
+                if closep != None {
+                    println!("{} {}", imgep.unwrap() + 2, closep.unwrap());
+                    let imgpath = &t[imgep.unwrap() + 2..closep.unwrap()];
+                    // grab the image
+                    let f = File::open(imgpath).expect("Not found image");
+                    let mut reader = BufReader::new(f);
+                    let mut buffer = Vec::new();
+
+                    // Read file into vector.
+                    reader.read_to_end(&mut buffer).expect("Cannot read file");
+                    let encoded =general_purpose::STANDARD.encode(&buffer);
+                    res = res + "<img src=\"data:image/png;base64, " + &encoded + "\"" + " alt=\"" + tag + "\">" + &t[closep.unwrap()+1..];
+                } else {
+                    println!("Invalid image tag, missing close parenthesis");
+                    process::exit(0x0100);
+                }
+            } else {
+                println!("Invalid image tag, missing close tag: {}", t);
+                process::exit(0x0100);
+            }
+            imgsp = t.find("![");
+            t = res;
+            imgsp = t.find("![");
+        }
+    }
+    return t.to_string();
+}
+
+// handles the tag in the given string
+fn handle_tag(t: &mut String, mdtag_flag: &mut bool, mdtag: &str, open_tag: &str, close_tag: &str) {
+    loop {
+        if let Some(_s3) = t.find(mdtag) {
+            if !*mdtag_flag {
+                *t = t.replacen(mdtag, open_tag, 1);
+                *mdtag_flag = true;
+            } else {
+                *t = t.replacen(mdtag, close_tag, 1);
+                *mdtag_flag = false;
+            }
+        } else {
+            break;
+        }
+    }
+}
+
+// will replace the last occurence of `w` (what) with `ww` (with what) in `s`
+fn replace_last(s:String, w:String, ww:String) -> String {
+    let i = s.rfind(w.as_str());
+    if i.is_some() {
+        let p1 = &s[..i.unwrap()].to_string();
+        let p2 = &s[i.unwrap() + w.len()..].to_string();
+        let result = p1.to_owned() + &ww + p2;
+        return result;
+    }
+    return s;
+}
+
+// close all the open tags
+fn close_opened_tags(mut html_lines: &mut Vec<String>, mut olist_count: &mut i8, mut olist_space_count: &mut i8, mut ulist_count: &mut i8, mut ulist_space_count: &mut i8)
+{
+    // usually lists end with an empty line
+    close_opened_lists(&mut html_lines, &mut olist_count, &mut olist_space_count, "</ol>");
+    close_opened_lists(&mut html_lines, &mut ulist_count, &mut ulist_space_count, "</ul>");
+    html_lines.push("<p>".to_string());
+}
+
+//
+// Main
+//
+fn main() {
+    let cwd = get_current_working_dir().unwrap();
+    let tmp = cwd.to_str();
+    let filename = tmp.unwrap().to_owned() + &"/amalgamated.md".to_string();
+    // File hosts must exist in current path before this produces output
+    if let Ok(lines) = read_lines(filename.clone()) {
+        // will be written to the html file
+        let mut html_lines:Vec<String> = Vec::new();
+        let mut bold_it:bool = false;    // whether the bold_italic tag was activated or not
+        let mut bold : bool = false;     // whether the bold tag was activated or not
+        let mut italic : bool = false;   // whether the italic tag was activated or not
+        let mut blockquote : i8 = 0;     // whether we have started a blockqoute tag or not. Tells us the nesting level of blockquotes
+        let mut olist_count : i8 = 0;    // whether we have an ordered list started, also counts the nested ones
+        let mut olist_space_count:i8 = 0;// the previous stage of the ordered lists
+        let mut ulist_count : i8 = 0;    // whether we have an ordered list started, also counts the nested ones
+        let mut ulist_space_count:i8 = 0;// the previous stage of the ordered lists
+
+        // create the html lines
+        for line in lines {
+            if let Ok(lcheck) = line {
+                // taking a copy for further operations
+                let mut cline:String = lcheck.to_string();
+                let tip = cline.clone();
+                let orig_ip = cline.clone();
+                let trimmed_line = tip.trim();
+
+                if cline.is_empty() {
+                    close_opened_tags(&mut html_lines, &mut olist_count, &mut olist_space_count, &mut ulist_count, &mut ulist_space_count);
+                    continue;
+                }
+
+                // is this a blockquote?
+                deal_with_blockquote(&mut html_lines, &mut blockquote, &mut cline);
+
+                // ordered list checker
+                let ol_start_checker = |tip: &str, idx: isize | -> bool { tip.chars().nth(idx.try_into().unwrap()).unwrap().is_numeric() };
+                deal_with_list(&mut html_lines, &mut olist_count, &mut olist_space_count, &orig_ip, trimmed_line, ol_start_checker, "<ol>", "</ol>", '.');
+
+                // unordered list checker
+                let ul_start_checker = |tip: &str, idx: isize | -> bool {
+                    if idx == 2 {
+                        return false;
+                    }
+                    let c = tip.chars().nth(idx.try_into().unwrap()).unwrap();
+                    match c {
+                        '*'|'-'|'+'=>true,
+                        _=>false
+                    }
+                };
+                deal_with_list(&mut html_lines, &mut ulist_count, &mut ulist_space_count, &orig_ip, trimmed_line, ul_start_checker, "<ul>", "</ul>", ' ');
+
+                // paragraph first check: is this a ===== line, denoting that the previous line was a h1 header
+                if handle_as_header(&cline, &mut html_lines, 1, &mut bold_it, &mut bold, &mut italic) {
+                    continue;
+                }
+
+                // paragraph second check: is this a ---- line, denoting that the previous line was a h2 header
+                if !handle_as_header(&cline, &mut html_lines, 2, &mut bold_it, &mut bold, &mut italic) {
+                    // it was not a header, must be a normal line
+                    let mut inted = interpret_line(cline.as_str(), &mut bold_it, &mut bold, &mut italic) + "\n";
+                    insert_list_entry(&mut olist_count, trimmed_line, &mut inted, ol_start_checker, '.');
+                    insert_list_entry(&mut ulist_count, trimmed_line, &mut inted, ul_start_checker,' ');
+                    html_lines.push(inted);
+                }
+            }
+        }
+
+        // closing all the opened tags
+        close_opened_tags(&mut html_lines, &mut olist_count, &mut olist_space_count, &mut ulist_count, &mut ulist_space_count);
+
+        // joining the lines
+        let mut full_html_line:String = Default::default();
+        for html_line in html_lines {
+            full_html_line += &html_line;
+        }
+
+        // did we encountered an unclosed bold_it
+        if bold_it {
+            full_html_line = replace_last(full_html_line, "<strong><em>".to_string(), "***".to_string());
+        }
+        if bold {
+            full_html_line = replace_last(full_html_line, "<strong>".to_string(), "**".to_string());
+        }
+        if italic {
+            full_html_line = replace_last(full_html_line, "<em>".to_string(), "*".to_string());
+        }
+
+        // create a html file
+        let mut file =  File::create("blaa.html").expect("Cannot create file");
+        file.write_all(full_html_line.as_bytes()).expect("Cannot write to file");
+    }
+    else {
+        println!("{} not found", filename.to_string());
+        return;
+    }
+}