Merge pull request #276 from zoni/pulldown-cmark-0.11

Upgrade to pulldown-cmark 0.11
This commit is contained in:
Nick Groenen 2024-08-10 10:54:58 +02:00 committed by GitHub
commit 7916cf47e3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 159 additions and 78 deletions

34
Cargo.lock generated
View File

@ -331,12 +331,6 @@ version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.155" version = "0.2.155"
@ -355,16 +349,6 @@ version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]]
name = "matter"
version = "0.1.0-alpha4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc16e839c57e0ad77957c42d39baab3692a1c6fa47692066470cddc24a5b0cd0"
dependencies = [
"lazy_static",
"regex",
]
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.7.4" version = "2.7.4"
@ -379,7 +363,6 @@ dependencies = [
"filetime", "filetime",
"gumdrop", "gumdrop",
"ignore", "ignore",
"matter",
"pathdiff", "pathdiff",
"percent-encoding", "percent-encoding",
"pretty_assertions", "pretty_assertions",
@ -456,21 +439,28 @@ dependencies = [
[[package]] [[package]]
name = "pulldown-cmark" name = "pulldown-cmark"
version = "0.9.6" version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" checksum = "8746739f11d39ce5ad5c2520a9b75285310dbfe78c541ccf832d38615765aec0"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.6.0",
"getopts", "getopts",
"memchr", "memchr",
"pulldown-cmark-escape",
"unicase", "unicase",
] ]
[[package]] [[package]]
name = "pulldown-cmark-to-cmark" name = "pulldown-cmark-escape"
version = "11.2.0" version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6dd464f32d7631035e849fcd969a603e9bb17ceaebe8467352a7728147f34e42" checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae"
[[package]]
name = "pulldown-cmark-to-cmark"
version = "15.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9c77db841443d89a57ae94f22d29c022f6d9f41b00bddbf1f4024dbaf4bdce1"
dependencies = [ dependencies = [
"pulldown-cmark", "pulldown-cmark",
] ]

View File

@ -28,11 +28,10 @@ doc = false
eyre = "0.6.12" eyre = "0.6.12"
gumdrop = "0.8.1" gumdrop = "0.8.1"
ignore = "0.4.22" ignore = "0.4.22"
matter = "0.1.0-alpha4"
pathdiff = "0.2.1" pathdiff = "0.2.1"
percent-encoding = "2.3.1" percent-encoding = "2.3.1"
pulldown-cmark = "0.9.3" pulldown-cmark = "0.11.0"
pulldown-cmark-to-cmark = "11.0.2" pulldown-cmark-to-cmark = "15.0.0"
rayon = "1.10.0" rayon = "1.10.0"
regex = "1.10.5" regex = "1.10.5"
serde_yaml = "0.9.34" serde_yaml = "0.9.34"

1
changelog.d/14.breaking.md Symbolic link
View File

@ -0,0 +1 @@
252.breaking.md

4
changelog.d/14.fix.md Normal file
View File

@ -0,0 +1,4 @@
Don't escape square brackets in math expressions
The upgrade to [pulldown-cmark](https://crates.io/crates/pulldown-cmark) 0.11 (see Backwards-incompatible Changes) includes official support for LaTeX-style math expressions.
With the markdown parser supporting this syntax natively, math expressions are now processed correctly without edge-cases.

View File

@ -0,0 +1,12 @@
Upgrade [pulldown-cmark](https://crates.io/crates/pulldown-cmark) from 0.9 to 0.11
pulldown-cmark is the Markdown/CommonMark parser that is used to read and convert notes (together with [pulldown-cmark-to-cmark](https://crates.io/crates/pulldown-cmark-to-cmark)).
For end-users that call the obsidian-export CLI this upgrade will be mostly transparent, except that Math blocks are now properly processed without getting mangled.
People who use the library directly may face more significant breaking changes if they have custom postprocessors, as pulldown-cmark's events have gone through various breaking changes.
For more information, see:
- <https://github.com/zoni/obsidian-export/pull/252>
- <https://github.com/pulldown-cmark/pulldown-cmark/releases/tag/v0.10.0>
- <https://github.com/zoni/obsidian-export/pull/276/files#diff-b1a35a68f14e696205874893c07fd24fdb88882b47c23cc0e0c80a30c7d53759>

1
changelog.d/252.fix.md Symbolic link
View File

@ -0,0 +1 @@
14.fix.md

1
changelog.d/259.breaking.md Symbolic link
View File

@ -0,0 +1 @@
252.breaking.md

View File

@ -19,7 +19,7 @@ use frontmatter::{frontmatter_from_str, frontmatter_to_str};
pub use frontmatter::{Frontmatter, FrontmatterStrategy}; pub use frontmatter::{Frontmatter, FrontmatterStrategy};
use pathdiff::diff_paths; use pathdiff::diff_paths;
use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS}; use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
use pulldown_cmark::{CodeBlockKind, CowStr, Event, HeadingLevel, Options, Parser, Tag}; use pulldown_cmark::{CodeBlockKind, CowStr, Event, HeadingLevel, Options, Parser, Tag, TagEnd};
use pulldown_cmark_to_cmark::cmark_with_options; use pulldown_cmark_to_cmark::cmark_with_options;
use rayon::prelude::*; use rayon::prelude::*;
use references::{ObsidianNoteReference, RefParser, RefParserState, RefType}; use references::{ObsidianNoteReference, RefParser, RefParserState, RefType};
@ -467,6 +467,7 @@ impl<'a> Exporter<'a> {
#[allow(clippy::too_many_lines)] #[allow(clippy::too_many_lines)]
#[allow(clippy::panic_in_result_fn)] #[allow(clippy::panic_in_result_fn)]
#[allow(clippy::shadow_unrelated)]
fn parse_obsidian_note<'b>( fn parse_obsidian_note<'b>(
&self, &self,
path: &Path, path: &Path,
@ -478,23 +479,40 @@ impl<'a> Exporter<'a> {
}); });
} }
let content = fs::read_to_string(path).context(ReadSnafu { path })?; let content = fs::read_to_string(path).context(ReadSnafu { path })?;
let (frontmatter, content) = let mut frontmatter = String::new();
matter::matter(&content).unwrap_or((String::new(), content.clone()));
let frontmatter =
frontmatter_from_str(&frontmatter).context(FrontMatterDecodeSnafu { path })?;
let mut parser_options = Options::empty(); let parser_options = Options::ENABLE_TABLES
parser_options.insert(Options::ENABLE_TABLES); | Options::ENABLE_FOOTNOTES
parser_options.insert(Options::ENABLE_FOOTNOTES); | Options::ENABLE_STRIKETHROUGH
parser_options.insert(Options::ENABLE_STRIKETHROUGH); | Options::ENABLE_TASKLISTS
parser_options.insert(Options::ENABLE_TASKLISTS); | Options::ENABLE_MATH
| Options::ENABLE_YAML_STYLE_METADATA_BLOCKS;
let mut ref_parser = RefParser::new(); let mut ref_parser = RefParser::new();
let mut events = vec![]; let mut events = vec![];
// Most of the time, a reference triggers 5 events: [ or ![, [, <text>, ], ] // Most of the time, a reference triggers 5 events: [ or ![, [, <text>, ], ]
let mut buffer = Vec::with_capacity(5); let mut buffer = Vec::with_capacity(5);
for event in Parser::new_ext(&content, parser_options) { let mut parser = Parser::new_ext(&content, parser_options);
'outer: while let Some(event) = parser.next() {
// When encountering a metadata block (frontmatter), collect all events until getting
// to the end of the block, at which point the nested loop will break out to the outer
// loop again.
if matches!(event, Event::Start(Tag::MetadataBlock(_kind))) {
for event in parser.by_ref() {
match event {
Event::Text(cowstr) => frontmatter.push_str(&cowstr),
Event::End(TagEnd::MetadataBlock(_kind)) => {
continue 'outer;
},
_ => panic!(
"Encountered an unexpected event while processing frontmatter in {}. Please report this as a bug with a copy of the note contents and this text: \n\nEvent: {:?}\n",
path.display(),
event
),
}
}
}
if ref_parser.state == RefParserState::Resetting { if ref_parser.state == RefParserState::Resetting {
events.append(&mut buffer); events.append(&mut buffer);
buffer.clear(); buffer.clear();
@ -583,8 +601,9 @@ impl<'a> Exporter<'a> {
if !buffer.is_empty() { if !buffer.is_empty() {
events.append(&mut buffer); events.append(&mut buffer);
} }
Ok(( Ok((
frontmatter, frontmatter_from_str(&frontmatter).context(FrontMatterDecodeSnafu { path })?,
events.into_iter().map(event_to_owned).collect(), events.into_iter().map(event_to_owned).collect(),
)) ))
} }
@ -662,20 +681,18 @@ impl<'a> Exporter<'a> {
// into an image reference instead. Slightly hacky, but avoids needing // into an image reference instead. Slightly hacky, but avoids needing
// to keep another utility function around for this, or introducing an // to keep another utility function around for this, or introducing an
// extra parameter on make_link_to_file. // extra parameter on make_link_to_file.
Event::Start(Tag::Link(linktype, cowstr1, cowstr2)) => { Event::Start(Tag::Link {
Event::Start(Tag::Image( link_type,
linktype, dest_url,
CowStr::from(cowstr1.into_string()), title,
CowStr::from(cowstr2.into_string()), id,
)) }) => Event::Start(Tag::Image {
} link_type,
Event::End(Tag::Link(linktype, cowstr1, cowstr2)) => { dest_url: CowStr::from(dest_url.into_string()),
Event::End(Tag::Image( title: CowStr::from(title.into_string()),
linktype, id: CowStr::from(id.into_string()),
CowStr::from(cowstr1.into_string()), }),
CowStr::from(cowstr2.into_string()), Event::End(TagEnd::Link) => Event::End(TagEnd::Image),
))
}
_ => event, _ => event,
}) })
.collect() .collect()
@ -707,7 +724,7 @@ impl<'a> Exporter<'a> {
return vec![ return vec![
Event::Start(Tag::Emphasis), Event::Start(Tag::Emphasis),
Event::Text(CowStr::from(reference.display())), Event::Text(CowStr::from(reference.display())),
Event::End(Tag::Emphasis), Event::End(TagEnd::Emphasis),
]; ];
} }
let target_file = target_file.unwrap(); let target_file = target_file.unwrap();
@ -731,16 +748,17 @@ impl<'a> Exporter<'a> {
link.push_str(&slugify(section)); link.push_str(&slugify(section));
} }
let link_tag = Tag::Link( let link_tag = Tag::Link {
pulldown_cmark::LinkType::Inline, link_type: pulldown_cmark::LinkType::Inline,
CowStr::from(link), dest_url: CowStr::from(link),
CowStr::from(""), title: CowStr::from(""),
); id: CowStr::from(""),
};
vec![ vec![
Event::Start(link_tag.clone()), Event::Start(link_tag),
Event::Text(CowStr::from(reference.display())), Event::Text(CowStr::from(reference.display())),
Event::End(link_tag.clone()), Event::End(TagEnd::Link),
] ]
} }
} }
@ -841,8 +859,7 @@ fn reduce_to_section<'a>(events: MarkdownEvents<'a>, section: &str) -> MarkdownE
for event in events { for event in events {
filtered_events.push(event.clone()); filtered_events.push(event.clone());
match event { match event {
// FIXME: This should propagate fragment_identifier and classes. Event::Start(Tag::Heading { level, .. }) => {
Event::Start(Tag::Heading(level, _fragment_identifier, _classes)) => {
last_tag_was_heading = true; last_tag_was_heading = true;
last_level = level; last_level = level;
if currently_in_target_section && level <= section_level { if currently_in_target_section && level <= section_level {
@ -881,10 +898,11 @@ fn reduce_to_section<'a>(events: MarkdownEvents<'a>, section: &str) -> MarkdownE
fn event_to_owned<'a>(event: Event<'_>) -> Event<'a> { fn event_to_owned<'a>(event: Event<'_>) -> Event<'a> {
match event { match event {
Event::Start(tag) => Event::Start(tag_to_owned(tag)), Event::Start(tag) => Event::Start(tag_to_owned(tag)),
Event::End(tag) => Event::End(tag_to_owned(tag)), Event::End(tag) => Event::End(tag),
Event::Text(cowstr) => Event::Text(CowStr::from(cowstr.into_string())), Event::Text(cowstr) => Event::Text(CowStr::from(cowstr.into_string())),
Event::Code(cowstr) => Event::Code(CowStr::from(cowstr.into_string())), Event::Code(cowstr) => Event::Code(CowStr::from(cowstr.into_string())),
Event::Html(cowstr) => Event::Html(CowStr::from(cowstr.into_string())), Event::Html(cowstr) => Event::Html(CowStr::from(cowstr.into_string())),
Event::InlineHtml(cowstr) => Event::InlineHtml(CowStr::from(cowstr.into_string())),
Event::FootnoteReference(cowstr) => { Event::FootnoteReference(cowstr) => {
Event::FootnoteReference(CowStr::from(cowstr.into_string())) Event::FootnoteReference(CowStr::from(cowstr.into_string()))
} }
@ -892,17 +910,37 @@ fn event_to_owned<'a>(event: Event<'_>) -> Event<'a> {
Event::HardBreak => Event::HardBreak, Event::HardBreak => Event::HardBreak,
Event::Rule => Event::Rule, Event::Rule => Event::Rule,
Event::TaskListMarker(checked) => Event::TaskListMarker(checked), Event::TaskListMarker(checked) => Event::TaskListMarker(checked),
Event::InlineMath(cowstr) => Event::InlineMath(CowStr::from(cowstr.into_string())),
Event::DisplayMath(cowstr) => Event::DisplayMath(CowStr::from(cowstr.into_string())),
} }
} }
fn tag_to_owned<'a>(tag: Tag<'_>) -> Tag<'a> { fn tag_to_owned<'a>(tag: Tag<'_>) -> Tag<'a> {
match tag { match tag {
Tag::Paragraph => Tag::Paragraph, Tag::Paragraph => Tag::Paragraph,
Tag::Heading(level, _fragment_identifier, _classes) => { Tag::Heading {
// FIXME: This should propagate fragment_identifier and classes. level: heading_level,
Tag::Heading(level, None, Vec::new()) id,
} classes,
Tag::BlockQuote => Tag::BlockQuote, attrs,
} => Tag::Heading {
level: heading_level,
id: id.map(|cowstr| CowStr::from(cowstr.into_string())),
classes: classes
.into_iter()
.map(|cowstr| CowStr::from(cowstr.into_string()))
.collect(),
attrs: attrs
.into_iter()
.map(|(attr, value)| {
(
CowStr::from(attr.into_string()),
value.map(|cowstr| CowStr::from(cowstr.into_string())),
)
})
.collect(),
},
Tag::BlockQuote(blockquote_kind) => Tag::BlockQuote(blockquote_kind),
Tag::CodeBlock(codeblock_kind) => Tag::CodeBlock(codeblock_kind_to_owned(codeblock_kind)), Tag::CodeBlock(codeblock_kind) => Tag::CodeBlock(codeblock_kind_to_owned(codeblock_kind)),
Tag::List(optional) => Tag::List(optional), Tag::List(optional) => Tag::List(optional),
Tag::Item => Tag::Item, Tag::Item => Tag::Item,
@ -916,16 +954,30 @@ fn tag_to_owned<'a>(tag: Tag<'_>) -> Tag<'a> {
Tag::Emphasis => Tag::Emphasis, Tag::Emphasis => Tag::Emphasis,
Tag::Strong => Tag::Strong, Tag::Strong => Tag::Strong,
Tag::Strikethrough => Tag::Strikethrough, Tag::Strikethrough => Tag::Strikethrough,
Tag::Link(linktype, cowstr1, cowstr2) => Tag::Link( Tag::Link {
linktype, link_type,
CowStr::from(cowstr1.into_string()), dest_url,
CowStr::from(cowstr2.into_string()), title,
), id,
Tag::Image(linktype, cowstr1, cowstr2) => Tag::Image( } => Tag::Link {
linktype, link_type,
CowStr::from(cowstr1.into_string()), dest_url: CowStr::from(dest_url.into_string()),
CowStr::from(cowstr2.into_string()), title: CowStr::from(title.into_string()),
), id: CowStr::from(id.into_string()),
},
Tag::Image {
link_type,
dest_url,
title,
id,
} => Tag::Image {
link_type,
dest_url: CowStr::from(dest_url.into_string()),
title: CowStr::from(title.into_string()),
id: CowStr::from(id.into_string()),
},
Tag::HtmlBlock => Tag::HtmlBlock,
Tag::MetadataBlock(metadata_block_kind) => Tag::MetadataBlock(metadata_block_kind),
} }
} }

View File

@ -0,0 +1,11 @@
This sentence uses `$` delimiters to show math inline: $\sqrt{3x-1}+(1+x)^2$
This is the same math expression expressed as a block element:
$$\sqrt{3x-1}+(1+x)^2$$
<!-- https://github.com/zoni/obsidian-export/issues/14 -->
With square brackets (inline): $[0, 2\pi]$
With square brackets (block):
$$[0, 2\pi]$$

View File

@ -37,7 +37,7 @@
~~Strikethrough~~ ~~Strikethrough~~
|Table|| |Table||
|-----|--| |-----|-|
|Foo|Bar| |Foo|Bar|
[link text](link-location.md) [link text](link-location.md)

View File

@ -0,0 +1,10 @@
This sentence uses `$` delimiters to show math inline: $\sqrt{3x-1}+(1+x)^2$
This is the same math expression expressed as a block element:
$$\sqrt{3x-1}+(1+x)^2$$
<!-- https://github.com/zoni/obsidian-export/issues/14 -->
With square brackets (inline): $[0, 2\pi]$
With square brackets (block):
$$[0, 2\pi]$$