diff --git a/Cargo.lock b/Cargo.lock index e6ed408..7416b7b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -331,12 +331,6 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" -[[package]] -name = "lazy_static" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" - [[package]] name = "libc" version = "0.2.155" @@ -355,16 +349,6 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" -[[package]] -name = "matter" -version = "0.1.0-alpha4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc16e839c57e0ad77957c42d39baab3692a1c6fa47692066470cddc24a5b0cd0" -dependencies = [ - "lazy_static", - "regex", -] - [[package]] name = "memchr" version = "2.7.4" @@ -379,7 +363,6 @@ dependencies = [ "filetime", "gumdrop", "ignore", - "matter", "pathdiff", "percent-encoding", "pretty_assertions", @@ -456,21 +439,28 @@ dependencies = [ [[package]] name = "pulldown-cmark" -version = "0.9.6" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" +checksum = "8746739f11d39ce5ad5c2520a9b75285310dbfe78c541ccf832d38615765aec0" dependencies = [ "bitflags 2.6.0", "getopts", "memchr", + "pulldown-cmark-escape", "unicase", ] [[package]] -name = "pulldown-cmark-to-cmark" -version = "11.2.0" +name = "pulldown-cmark-escape" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd464f32d7631035e849fcd969a603e9bb17ceaebe8467352a7728147f34e42" +checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae" + +[[package]] +name = "pulldown-cmark-to-cmark" +version = "15.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9c77db841443d89a57ae94f22d29c022f6d9f41b00bddbf1f4024dbaf4bdce1" dependencies = [ "pulldown-cmark", ] diff --git a/Cargo.toml b/Cargo.toml index 98234ab..1d23138 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,11 +28,10 @@ doc = false eyre = "0.6.12" gumdrop = "0.8.1" ignore = "0.4.22" -matter = "0.1.0-alpha4" pathdiff = "0.2.1" percent-encoding = "2.3.1" -pulldown-cmark = "0.9.3" -pulldown-cmark-to-cmark = "11.0.2" +pulldown-cmark = "0.11.0" +pulldown-cmark-to-cmark = "15.0.0" rayon = "1.10.0" regex = "1.10.5" serde_yaml = "0.9.34" diff --git a/changelog.d/14.breaking.md b/changelog.d/14.breaking.md new file mode 120000 index 0000000..1748068 --- /dev/null +++ b/changelog.d/14.breaking.md @@ -0,0 +1 @@ +252.breaking.md \ No newline at end of file diff --git a/changelog.d/14.fix.md b/changelog.d/14.fix.md new file mode 100644 index 0000000..6779103 --- /dev/null +++ b/changelog.d/14.fix.md @@ -0,0 +1,4 @@ +Don't escape square brackets in math expressions + +The upgrade to [pulldown-cmark](https://crates.io/crates/pulldown-cmark) 0.11 (see Backwards-incompatible Changes) includes official support for LaTeX-style math expressions. +With the markdown parser supporting this syntax natively, math expressions are now processed correctly without edge-cases. diff --git a/changelog.d/252.breaking.md b/changelog.d/252.breaking.md new file mode 100644 index 0000000..1e1fe55 --- /dev/null +++ b/changelog.d/252.breaking.md @@ -0,0 +1,12 @@ +Upgrade [pulldown-cmark](https://crates.io/crates/pulldown-cmark) from 0.9 to 0.11 + +pulldown-cmark is the Markdown/CommonMark parser that is used to read and convert notes (together with [pulldown-cmark-to-cmark](https://crates.io/crates/pulldown-cmark-to-cmark)). + +For end-users that call the obsidian-export CLI this upgrade will be mostly transparent, except that Math blocks are now properly processed without getting mangled. + +People who use the library directly may face more significant breaking changes if they have custom postprocessors, as pulldown-cmark's events have gone through various breaking changes. +For more information, see: + +- +- +- diff --git a/changelog.d/252.fix.md b/changelog.d/252.fix.md new file mode 120000 index 0000000..56ead1e --- /dev/null +++ b/changelog.d/252.fix.md @@ -0,0 +1 @@ +14.fix.md \ No newline at end of file diff --git a/changelog.d/259.breaking.md b/changelog.d/259.breaking.md new file mode 120000 index 0000000..1748068 --- /dev/null +++ b/changelog.d/259.breaking.md @@ -0,0 +1 @@ +252.breaking.md \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 6a2d821..3dfbdbf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,7 +19,7 @@ use frontmatter::{frontmatter_from_str, frontmatter_to_str}; pub use frontmatter::{Frontmatter, FrontmatterStrategy}; use pathdiff::diff_paths; use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS}; -use pulldown_cmark::{CodeBlockKind, CowStr, Event, HeadingLevel, Options, Parser, Tag}; +use pulldown_cmark::{CodeBlockKind, CowStr, Event, HeadingLevel, Options, Parser, Tag, TagEnd}; use pulldown_cmark_to_cmark::cmark_with_options; use rayon::prelude::*; use references::{ObsidianNoteReference, RefParser, RefParserState, RefType}; @@ -467,6 +467,7 @@ impl<'a> Exporter<'a> { #[allow(clippy::too_many_lines)] #[allow(clippy::panic_in_result_fn)] + #[allow(clippy::shadow_unrelated)] fn parse_obsidian_note<'b>( &self, path: &Path, @@ -478,23 +479,40 @@ impl<'a> Exporter<'a> { }); } let content = fs::read_to_string(path).context(ReadSnafu { path })?; - let (frontmatter, content) = - matter::matter(&content).unwrap_or((String::new(), content.clone())); - let frontmatter = - frontmatter_from_str(&frontmatter).context(FrontMatterDecodeSnafu { path })?; + let mut frontmatter = String::new(); - let mut parser_options = Options::empty(); - parser_options.insert(Options::ENABLE_TABLES); - parser_options.insert(Options::ENABLE_FOOTNOTES); - parser_options.insert(Options::ENABLE_STRIKETHROUGH); - parser_options.insert(Options::ENABLE_TASKLISTS); + let parser_options = Options::ENABLE_TABLES + | Options::ENABLE_FOOTNOTES + | Options::ENABLE_STRIKETHROUGH + | Options::ENABLE_TASKLISTS + | Options::ENABLE_MATH + | Options::ENABLE_YAML_STYLE_METADATA_BLOCKS; let mut ref_parser = RefParser::new(); let mut events = vec![]; // Most of the time, a reference triggers 5 events: [ or ![, [, , ], ] let mut buffer = Vec::with_capacity(5); - for event in Parser::new_ext(&content, parser_options) { + let mut parser = Parser::new_ext(&content, parser_options); + 'outer: while let Some(event) = parser.next() { + // When encountering a metadata block (frontmatter), collect all events until getting + // to the end of the block, at which point the nested loop will break out to the outer + // loop again. + if matches!(event, Event::Start(Tag::MetadataBlock(_kind))) { + for event in parser.by_ref() { + match event { + Event::Text(cowstr) => frontmatter.push_str(&cowstr), + Event::End(TagEnd::MetadataBlock(_kind)) => { + continue 'outer; + }, + _ => panic!( + "Encountered an unexpected event while processing frontmatter in {}. Please report this as a bug with a copy of the note contents and this text: \n\nEvent: {:?}\n", + path.display(), + event + ), + } + } + } if ref_parser.state == RefParserState::Resetting { events.append(&mut buffer); buffer.clear(); @@ -583,8 +601,9 @@ impl<'a> Exporter<'a> { if !buffer.is_empty() { events.append(&mut buffer); } + Ok(( - frontmatter, + frontmatter_from_str(&frontmatter).context(FrontMatterDecodeSnafu { path })?, events.into_iter().map(event_to_owned).collect(), )) } @@ -662,20 +681,18 @@ impl<'a> Exporter<'a> { // into an image reference instead. Slightly hacky, but avoids needing // to keep another utility function around for this, or introducing an // extra parameter on make_link_to_file. - Event::Start(Tag::Link(linktype, cowstr1, cowstr2)) => { - Event::Start(Tag::Image( - linktype, - CowStr::from(cowstr1.into_string()), - CowStr::from(cowstr2.into_string()), - )) - } - Event::End(Tag::Link(linktype, cowstr1, cowstr2)) => { - Event::End(Tag::Image( - linktype, - CowStr::from(cowstr1.into_string()), - CowStr::from(cowstr2.into_string()), - )) - } + Event::Start(Tag::Link { + link_type, + dest_url, + title, + id, + }) => Event::Start(Tag::Image { + link_type, + dest_url: CowStr::from(dest_url.into_string()), + title: CowStr::from(title.into_string()), + id: CowStr::from(id.into_string()), + }), + Event::End(TagEnd::Link) => Event::End(TagEnd::Image), _ => event, }) .collect() @@ -707,7 +724,7 @@ impl<'a> Exporter<'a> { return vec![ Event::Start(Tag::Emphasis), Event::Text(CowStr::from(reference.display())), - Event::End(Tag::Emphasis), + Event::End(TagEnd::Emphasis), ]; } let target_file = target_file.unwrap(); @@ -731,16 +748,17 @@ impl<'a> Exporter<'a> { link.push_str(&slugify(section)); } - let link_tag = Tag::Link( - pulldown_cmark::LinkType::Inline, - CowStr::from(link), - CowStr::from(""), - ); + let link_tag = Tag::Link { + link_type: pulldown_cmark::LinkType::Inline, + dest_url: CowStr::from(link), + title: CowStr::from(""), + id: CowStr::from(""), + }; vec![ - Event::Start(link_tag.clone()), + Event::Start(link_tag), Event::Text(CowStr::from(reference.display())), - Event::End(link_tag.clone()), + Event::End(TagEnd::Link), ] } } @@ -841,8 +859,7 @@ fn reduce_to_section<'a>(events: MarkdownEvents<'a>, section: &str) -> MarkdownE for event in events { filtered_events.push(event.clone()); match event { - // FIXME: This should propagate fragment_identifier and classes. - Event::Start(Tag::Heading(level, _fragment_identifier, _classes)) => { + Event::Start(Tag::Heading { level, .. }) => { last_tag_was_heading = true; last_level = level; if currently_in_target_section && level <= section_level { @@ -881,10 +898,11 @@ fn reduce_to_section<'a>(events: MarkdownEvents<'a>, section: &str) -> MarkdownE fn event_to_owned<'a>(event: Event<'_>) -> Event<'a> { match event { Event::Start(tag) => Event::Start(tag_to_owned(tag)), - Event::End(tag) => Event::End(tag_to_owned(tag)), + Event::End(tag) => Event::End(tag), Event::Text(cowstr) => Event::Text(CowStr::from(cowstr.into_string())), Event::Code(cowstr) => Event::Code(CowStr::from(cowstr.into_string())), Event::Html(cowstr) => Event::Html(CowStr::from(cowstr.into_string())), + Event::InlineHtml(cowstr) => Event::InlineHtml(CowStr::from(cowstr.into_string())), Event::FootnoteReference(cowstr) => { Event::FootnoteReference(CowStr::from(cowstr.into_string())) } @@ -892,17 +910,37 @@ fn event_to_owned<'a>(event: Event<'_>) -> Event<'a> { Event::HardBreak => Event::HardBreak, Event::Rule => Event::Rule, Event::TaskListMarker(checked) => Event::TaskListMarker(checked), + Event::InlineMath(cowstr) => Event::InlineMath(CowStr::from(cowstr.into_string())), + Event::DisplayMath(cowstr) => Event::DisplayMath(CowStr::from(cowstr.into_string())), } } fn tag_to_owned<'a>(tag: Tag<'_>) -> Tag<'a> { match tag { Tag::Paragraph => Tag::Paragraph, - Tag::Heading(level, _fragment_identifier, _classes) => { - // FIXME: This should propagate fragment_identifier and classes. - Tag::Heading(level, None, Vec::new()) - } - Tag::BlockQuote => Tag::BlockQuote, + Tag::Heading { + level: heading_level, + id, + classes, + attrs, + } => Tag::Heading { + level: heading_level, + id: id.map(|cowstr| CowStr::from(cowstr.into_string())), + classes: classes + .into_iter() + .map(|cowstr| CowStr::from(cowstr.into_string())) + .collect(), + attrs: attrs + .into_iter() + .map(|(attr, value)| { + ( + CowStr::from(attr.into_string()), + value.map(|cowstr| CowStr::from(cowstr.into_string())), + ) + }) + .collect(), + }, + Tag::BlockQuote(blockquote_kind) => Tag::BlockQuote(blockquote_kind), Tag::CodeBlock(codeblock_kind) => Tag::CodeBlock(codeblock_kind_to_owned(codeblock_kind)), Tag::List(optional) => Tag::List(optional), Tag::Item => Tag::Item, @@ -916,16 +954,30 @@ fn tag_to_owned<'a>(tag: Tag<'_>) -> Tag<'a> { Tag::Emphasis => Tag::Emphasis, Tag::Strong => Tag::Strong, Tag::Strikethrough => Tag::Strikethrough, - Tag::Link(linktype, cowstr1, cowstr2) => Tag::Link( - linktype, - CowStr::from(cowstr1.into_string()), - CowStr::from(cowstr2.into_string()), - ), - Tag::Image(linktype, cowstr1, cowstr2) => Tag::Image( - linktype, - CowStr::from(cowstr1.into_string()), - CowStr::from(cowstr2.into_string()), - ), + Tag::Link { + link_type, + dest_url, + title, + id, + } => Tag::Link { + link_type, + dest_url: CowStr::from(dest_url.into_string()), + title: CowStr::from(title.into_string()), + id: CowStr::from(id.into_string()), + }, + Tag::Image { + link_type, + dest_url, + title, + id, + } => Tag::Image { + link_type, + dest_url: CowStr::from(dest_url.into_string()), + title: CowStr::from(title.into_string()), + id: CowStr::from(id.into_string()), + }, + Tag::HtmlBlock => Tag::HtmlBlock, + Tag::MetadataBlock(metadata_block_kind) => Tag::MetadataBlock(metadata_block_kind), } } diff --git a/tests/testdata/expected/main-samples/math.md b/tests/testdata/expected/main-samples/math.md new file mode 100644 index 0000000..5da3a9c --- /dev/null +++ b/tests/testdata/expected/main-samples/math.md @@ -0,0 +1,11 @@ +This sentence uses `$` delimiters to show math inline: $\sqrt{3x-1}+(1+x)^2$ + +This is the same math expression expressed as a block element: +$$\sqrt{3x-1}+(1+x)^2$$ + + + +With square brackets (inline): $[0, 2\pi]$ + +With square brackets (block): +$$[0, 2\pi]$$ diff --git a/tests/testdata/expected/main-samples/pure-markdown-examples.md b/tests/testdata/expected/main-samples/pure-markdown-examples.md index 0d94b45..54c7298 100644 --- a/tests/testdata/expected/main-samples/pure-markdown-examples.md +++ b/tests/testdata/expected/main-samples/pure-markdown-examples.md @@ -37,7 +37,7 @@ ~~Strikethrough~~ |Table|| -|-----|--| +|-----|-| |Foo|Bar| [link text](link-location.md) diff --git a/tests/testdata/input/main-samples/math.md b/tests/testdata/input/main-samples/math.md new file mode 100644 index 0000000..de6923e --- /dev/null +++ b/tests/testdata/input/main-samples/math.md @@ -0,0 +1,10 @@ +This sentence uses `$` delimiters to show math inline: $\sqrt{3x-1}+(1+x)^2$ + +This is the same math expression expressed as a block element: +$$\sqrt{3x-1}+(1+x)^2$$ + + +With square brackets (inline): $[0, 2\pi]$ + +With square brackets (block): +$$[0, 2\pi]$$