new: postprocessing support

Add support for postprocessing of Markdown prior to writing converted
notes to disk.

Postprocessors may be used when making use of Obsidian export as a Rust
library to do the following:

1. Modify a note's `Context`, for example to change the destination
   filename or update its Frontmatter.
2. Change a note's contents by altering `MarkdownEvents`.
3. Prevent later postprocessors from running or cause a note to be
   skipped entirely.

Future releases of Obsidian export may come with built-in postprocessors
for users of the command-line tool to use, if general use-cases can be
identified.

For example, a future release might include functionality to make notes
more suitable for the Hugo static site generator. This functionality
would be implemented as a postprocessor that could be enabled through
command-line flags.
This commit is contained in:
Nick Groenen 2021-02-20 21:35:45 +01:00
parent f0dd6f7132
commit 58eb79e53d
No known key found for this signature in database
GPG Key ID: 4F0AD019928AE098
9 changed files with 779 additions and 334 deletions

40
Cargo.lock generated
View File

@ -125,6 +125,12 @@ version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "dtoa"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88d7ed2934d741c6b37e33e3832298e8850b53fd2d2bea03873375596c7cea4e"
[[package]] [[package]]
name = "either" name = "either"
version = "1.6.1" version = "1.6.1"
@ -245,6 +251,12 @@ version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7282d924be3275cec7f6756ff4121987bc6481325397dde6ba3e7802b1a8b1c" checksum = "b7282d924be3275cec7f6756ff4121987bc6481325397dde6ba3e7802b1a8b1c"
[[package]]
name = "linked-hash-map"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3"
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.14" version = "0.4.14"
@ -305,6 +317,7 @@ dependencies = [
"pulldown-cmark-to-cmark", "pulldown-cmark-to-cmark",
"rayon", "rayon",
"regex", "regex",
"serde_yaml",
"slug", "slug",
"snafu", "snafu",
"tempfile", "tempfile",
@ -511,6 +524,24 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "serde"
version = "1.0.123"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae"
[[package]]
name = "serde_yaml"
version = "0.8.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15654ed4ab61726bf918a39cb8d98a2e2995b002387807fa6ba58fdf7f59bb23"
dependencies = [
"dtoa",
"linked-hash-map",
"serde",
"yaml-rust",
]
[[package]] [[package]]
name = "slug" name = "slug"
version = "0.1.4" version = "0.1.4"
@ -649,3 +680,12 @@ name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0" version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "yaml-rust"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
dependencies = [
"linked-hash-map",
]

View File

@ -35,6 +35,7 @@ pulldown-cmark = "0.8.0"
pulldown-cmark-to-cmark = "6.0.0" pulldown-cmark-to-cmark = "6.0.0"
rayon = "1.5.0" rayon = "1.5.0"
regex = "1.4.3" regex = "1.4.3"
serde_yaml = "0.8.17"
slug = "0.1.4" slug = "0.1.4"
snafu = "0.6.10" snafu = "0.6.10"

93
src/context.rs Normal file
View File

@ -0,0 +1,93 @@
use crate::Frontmatter;
use std::path::PathBuf;
#[derive(Debug, Clone)]
/// Context holds metadata about a note which is being parsed.
///
/// This is used internally to keep track of nesting and help with constructing proper references
/// to other notes.
///
/// It is also passed to [postprocessors][crate::Postprocessor] to provide contextual information
/// and allow modification of a note's frontmatter.
pub struct Context {
file_tree: Vec<PathBuf>,
/// The path where this note will be written to when exported.
///
/// Changing this path will result in the note being written to that new path instead, but
/// beware: links will not be updated automatically. If this is changed by a
/// [postprocessor][crate::Postprocessor], it's up to that postprocessor to rewrite any
/// existing links to this new path.
pub destination: PathBuf,
/// The [Frontmatter] for this note. Frontmatter may be modified in-place (see
/// [serde_yaml::Mapping] for available methods) or replaced entirely.
///
/// # Example
///
/// Insert `foo: bar` into a note's frontmatter:
///
/// ```
/// # use obsidian_export::Frontmatter;
/// # use obsidian_export::Context;
/// # use std::path::PathBuf;
/// use obsidian_export::serde_yaml::Value;
///
/// # let mut context = Context::new(PathBuf::from("source"), PathBuf::from("destination"));
/// let key = Value::String("foo".to_string());
///
/// context.frontmatter.insert(
/// key.clone(),
/// Value::String("bar".to_string()),
/// );
/// ```
pub frontmatter: Frontmatter,
}
impl Context {
/// Create a new `Context`
pub fn new(src: PathBuf, dest: PathBuf) -> Context {
Context {
file_tree: vec![src],
destination: dest,
frontmatter: Frontmatter::new(),
}
}
/// Create a new `Context` which inherits from a parent Context.
pub fn from_parent(context: &Context, child: &PathBuf) -> Context {
let mut context = context.clone();
context.file_tree.push(child.to_path_buf());
context
}
/// Return the path of the file currently being parsed.
pub fn current_file(&self) -> &PathBuf {
self.file_tree
.last()
.expect("Context not initialized properly, file_tree is empty")
}
/// Return the path of the root file.
///
/// Typically this will yield the same element as `current_file`, but when a note is embedded
/// within another note, this will return the outer-most note.
pub fn root_file(&self) -> &PathBuf {
self.file_tree
.first()
.expect("Context not initialized properly, file_tree is empty")
}
/// Return the note depth (nesting level) for this context.
pub fn note_depth(&self) -> usize {
self.file_tree.len()
}
/// Return the list of files associated with this context.
///
/// The first element corresponds to the root file, the final element corresponds to the file
/// which is currently being processed (see also `current_file`).
pub fn file_tree(&self) -> Vec<PathBuf> {
self.file_tree.clone()
}
}

92
src/frontmatter.rs Normal file
View File

@ -0,0 +1,92 @@
use serde_yaml::Result;
/// YAML front matter from an Obsidian note.
///
/// This is essentially an alias of [serde_yaml::Mapping] so all the methods available on that type
/// are available with `Frontmatter` as well.
///
/// # Examples
///
/// ```
/// # use obsidian_export::Frontmatter;
/// use serde_yaml::Value;
///
/// let mut frontmatter = Frontmatter::new();
/// let key = Value::String("foo".to_string());
///
/// frontmatter.insert(
/// key.clone(),
/// Value::String("bar".to_string()),
/// );
///
/// assert_eq!(
/// frontmatter.get(&key),
/// Some(&Value::String("bar".to_string())),
/// )
/// ```
pub type Frontmatter = serde_yaml::Mapping;
pub fn frontmatter_from_str(mut s: &str) -> Result<Frontmatter> {
if s.is_empty() {
s = "{}";
}
let frontmatter: Frontmatter = serde_yaml::from_str(s)?;
Ok(frontmatter)
}
pub fn frontmatter_to_str(frontmatter: Frontmatter) -> Result<String> {
if frontmatter.is_empty() {
return Ok("---\n---\n".to_string());
}
let mut buffer = String::new();
buffer.push_str(&serde_yaml::to_string(&frontmatter)?);
buffer.push_str("---\n");
Ok(buffer)
}
#[derive(Debug, Clone, Copy)]
/// Available strategies for the inclusion of frontmatter in notes.
pub enum FrontmatterStrategy {
/// Copy frontmatter when a note has frontmatter defined.
Auto,
/// Always add frontmatter header, including empty frontmatter when none was originally
/// specified.
Always,
/// Never add any frontmatter to notes.
Never,
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
use serde_yaml::Value;
#[test]
fn empty_string_should_yield_empty_frontmatter() {
assert_eq!(frontmatter_from_str("").unwrap(), Frontmatter::new())
}
#[test]
fn empty_frontmatter_to_str() {
let frontmatter = Frontmatter::new();
assert_eq!(
frontmatter_to_str(frontmatter).unwrap(),
format!("---\n---\n")
)
}
#[test]
fn nonempty_frontmatter_to_str() {
let mut frontmatter = Frontmatter::new();
frontmatter.insert(
Value::String("foo".to_string()),
Value::String("bar".to_string()),
);
assert_eq!(
frontmatter_to_str(frontmatter).unwrap(),
format!("---\nfoo: bar\n---\n")
)
}
}

View File

@ -1,16 +1,25 @@
pub extern crate pulldown_cmark;
pub extern crate serde_yaml;
#[macro_use] #[macro_use]
extern crate lazy_static; extern crate lazy_static;
mod context;
mod frontmatter;
mod references;
mod walker; mod walker;
pub use context::Context;
pub use frontmatter::{Frontmatter, FrontmatterStrategy};
pub use walker::{vault_contents, WalkOptions}; pub use walker::{vault_contents, WalkOptions};
use frontmatter::{frontmatter_from_str, frontmatter_to_str};
use pathdiff::diff_paths; use pathdiff::diff_paths;
use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS}; use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag}; use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag};
use pulldown_cmark_to_cmark::cmark_with_options; use pulldown_cmark_to_cmark::cmark_with_options;
use rayon::prelude::*; use rayon::prelude::*;
use regex::Regex; use references::*;
use slug::slugify; use slug::slugify;
use snafu::{ResultExt, Snafu}; use snafu::{ResultExt, Snafu};
use std::ffi::OsString; use std::ffi::OsString;
@ -21,13 +30,98 @@ use std::io::ErrorKind;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::str; use std::str;
type Result<T, E = ExportError> = std::result::Result<T, E>; /// A series of markdown [Event]s that are generated while traversing an Obsidian markdown note.
type MarkdownTree<'a> = Vec<Event<'a>>; pub type MarkdownEvents<'a> = Vec<Event<'a>>;
/// A post-processing function that is to be called after an Obsidian note has been fully parsed and
/// converted to regular markdown syntax.
///
/// Postprocessors are called in the order they've been added through [Exporter::add_postprocessor]
/// just before notes are written out to their final destination.
/// They may be used to achieve the following:
///
/// 1. Modify a note's [Context], for example to change the destination filename or update its [Frontmatter] (see [Context::frontmatter]).
/// 2. Change a note's contents by altering [MarkdownEvents].
/// 3. Prevent later postprocessors from running ([PostprocessorResult::StopHere]) or cause a note
/// to be skipped entirely ([PostprocessorResult::StopAndSkipNote]).
///
/// # Examples
///
/// ## Update frontmatter
///
/// This example shows how to make changes a note's frontmatter. In this case, the postprocessor is
/// defined inline as a closure.
///
/// ```
/// use obsidian_export::{Context, Exporter, MarkdownEvents, PostprocessorResult};
/// use obsidian_export::pulldown_cmark::{CowStr, Event};
/// use obsidian_export::serde_yaml::Value;
/// # use std::path::PathBuf;
/// # use tempfile::TempDir;
///
/// # let tmp_dir = TempDir::new().expect("failed to make tempdir");
/// # let source = PathBuf::from("tests/testdata/input/postprocessors");
/// # let destination = tmp_dir.path().to_path_buf();
/// let mut exporter = Exporter::new(source, destination);
///
/// // add_postprocessor registers a new postprocessor. In this example we use a closure.
/// exporter.add_postprocessor(&|mut context, events| {
/// // This is the key we'll insert into the frontmatter. In this case, the string "foo".
/// let key = Value::String("foo".to_string());
/// // This is the value we'll insert into the frontmatter. In this case, the string "bar".
/// let value = Value::String("baz".to_string());
///
/// // Frontmatter can be updated in-place, so we can call insert on it directly.
/// context.frontmatter.insert(key, value);
///
/// // Postprocessors must return their (modified) context, the markdown events that make
/// // up the note and a next action to take.
/// (context, events, PostprocessorResult::Continue)
/// });
///
/// exporter.run().unwrap();
/// ```
///
/// ## Change note contents
///
/// In this example a note's markdown content is changed by iterating over the [MarkdownEvents] and
/// changing the text when we encounter a [text element][Event::Text].
///
/// Instead of using a closure like above, this example shows how to use a separate function
/// definition.
/// ```
/// # use obsidian_export::{Context, Exporter, MarkdownEvents, PostprocessorResult};
/// # use pulldown_cmark::{CowStr, Event};
/// # use std::path::PathBuf;
/// # use tempfile::TempDir;
/// #
/// /// This postprocessor replaces any instance of "foo" with "bar" in the note body.
/// fn foo_to_bar(
/// context: Context,
/// events: MarkdownEvents,
/// ) -> (Context, MarkdownEvents, PostprocessorResult) {
/// let events = events
/// .into_iter()
/// .map(|event| match event {
/// Event::Text(text) => Event::Text(CowStr::from(text.replace("foo", "bar"))),
/// event => event,
/// })
/// .collect();
/// (context, events, PostprocessorResult::Continue)
/// }
///
/// # let tmp_dir = TempDir::new().expect("failed to make tempdir");
/// # let source = PathBuf::from("tests/testdata/input/postprocessors");
/// # let destination = tmp_dir.path().to_path_buf();
/// # let mut exporter = Exporter::new(source, destination);
/// exporter.add_postprocessor(&foo_to_bar);
/// # exporter.run().unwrap();
/// ```
pub type Postprocessor =
dyn Fn(Context, MarkdownEvents) -> (Context, MarkdownEvents, PostprocessorResult) + Send + Sync;
type Result<T, E = ExportError> = std::result::Result<T, E>;
lazy_static! {
static ref OBSIDIAN_NOTE_LINK_RE: Regex =
Regex::new(r"^(?P<file>[^#|]+)??(#(?P<section>.+?))??(\|(?P<label>.+?))??$").unwrap();
}
const PERCENTENCODE_CHARS: &AsciiSet = &CONTROLS.add(b' ').add(b'(').add(b')').add(b'%').add(b'?'); const PERCENTENCODE_CHARS: &AsciiSet = &CONTROLS.add(b' ').add(b'(').add(b')').add(b'%').add(b'?');
const NOTE_RECURSION_LIMIT: usize = 10; const NOTE_RECURSION_LIMIT: usize = 10;
@ -80,21 +174,34 @@ pub enum ExportError {
#[snafu(source(from(ExportError, Box::new)))] #[snafu(source(from(ExportError, Box::new)))]
source: Box<ExportError>, source: Box<ExportError>,
}, },
#[snafu(display("Failed to decode YAML frontmatter in '{}'", path.display()))]
FrontMatterDecodeError {
path: PathBuf,
#[snafu(source(from(serde_yaml::Error, Box::new)))]
source: Box<serde_yaml::Error>,
},
#[snafu(display("Failed to encode YAML frontmatter for '{}'", path.display()))]
FrontMatterEncodeError {
path: PathBuf,
#[snafu(source(from(serde_yaml::Error, Box::new)))]
source: Box<serde_yaml::Error>,
},
} }
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy, PartialEq)]
/// FrontmatterStrategy determines how frontmatter is handled in Markdown files. /// Emitted by [Postprocessor]s to signal the next action to take.
pub enum FrontmatterStrategy { pub enum PostprocessorResult {
/// Copy frontmatter when a note has frontmatter defined. /// Continue with the next post-processor (if any).
Auto, Continue,
/// Always add frontmatter header, including empty frontmatter when none was originally /// Use this note, but don't run any more post-processors after this one.
/// specified. StopHere,
Always, /// Skip this note (don't export it) and don't run any more post-processors.
/// Never add any frontmatter to notes. StopAndSkipNote,
Never,
} }
#[derive(Debug, Clone)] #[derive(Clone)]
/// Exporter provides the main interface to this library. /// Exporter provides the main interface to this library.
/// ///
/// Users are expected to create an Exporter using [`Exporter::new`], optionally followed by /// Users are expected to create an Exporter using [`Exporter::new`], optionally followed by
@ -108,164 +215,26 @@ pub struct Exporter<'a> {
vault_contents: Option<Vec<PathBuf>>, vault_contents: Option<Vec<PathBuf>>,
walk_options: WalkOptions<'a>, walk_options: WalkOptions<'a>,
process_embeds_recursively: bool, process_embeds_recursively: bool,
postprocessors: Vec<&'a Postprocessor>,
} }
#[derive(Debug, Clone)] impl<'a> fmt::Debug for Exporter<'a> {
/// Context holds parser metadata for the file/note currently being parsed. fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
struct Context { f.debug_struct("WalkOptions")
file_tree: Vec<PathBuf>, .field("root", &self.root)
frontmatter_strategy: FrontmatterStrategy, .field("destination", &self.destination)
} .field("frontmatter_strategy", &self.frontmatter_strategy)
.field("vault_contents", &self.vault_contents)
#[derive(Debug, Clone, PartialEq)] .field("walk_options", &self.walk_options)
/// ObsidianNoteReference represents the structure of a `[[note]]` or `![[embed]]` reference. .field(
struct ObsidianNoteReference<'a> { "process_embeds_recursively",
/// The file (note name or partial path) being referenced. &self.process_embeds_recursively,
/// This will be None in the case that the reference is to a section within the same document )
file: Option<&'a str>, .field(
/// If specific, a specific section/heading being referenced. "postprocessors",
section: Option<&'a str>, &format!("<{} postprocessors active>", self.postprocessors.len()),
/// If specific, the custom label/text which was specified. )
label: Option<&'a str>, .finish()
}
#[derive(PartialEq)]
/// RefParserState enumerates all the possible parsing states [RefParser] may enter.
enum RefParserState {
NoState,
ExpectSecondOpenBracket,
ExpectRefText,
ExpectRefTextOrCloseBracket,
ExpectFinalCloseBracket,
Resetting,
}
/// RefType indicates whether a note reference is a link (`[[note]]`) or embed (`![[embed]]`).
enum RefType {
Link,
Embed,
}
/// RefParser holds state which is used to parse Obsidian WikiLinks (`[[note]]`, `![[embed]]`).
struct RefParser {
state: RefParserState,
ref_type: Option<RefType>,
// References sometimes come in through multiple events. One example of this is when notes
// start with an underscore (_), presumably because this is also the literal which starts
// italic and bold text.
//
// ref_text concatenates the values from these partial events so that there's a fully-formed
// string to work with by the time the final `]]` is encountered.
ref_text: String,
}
impl RefParser {
fn new() -> RefParser {
RefParser {
state: RefParserState::NoState,
ref_type: None,
ref_text: String::new(),
}
}
fn transition(&mut self, new_state: RefParserState) {
self.state = new_state;
}
fn reset(&mut self) {
self.state = RefParserState::NoState;
self.ref_type = None;
self.ref_text.clear();
}
}
impl Context {
/// Create a new `Context`
fn new(file: PathBuf) -> Context {
Context {
file_tree: vec![file],
frontmatter_strategy: FrontmatterStrategy::Auto,
}
}
/// Create a new `Context` which inherits from a parent Context.
fn from_parent(context: &Context, child: &PathBuf) -> Context {
let mut context = context.clone();
context.file_tree.push(child.to_path_buf());
context
}
/// Associate a new `FrontmatterStrategy` with this context.
fn set_frontmatter_strategy(&mut self, strategy: FrontmatterStrategy) -> &mut Context {
self.frontmatter_strategy = strategy;
self
}
/// Return the path of the file currently being parsed.
fn current_file(&self) -> &PathBuf {
self.file_tree
.last()
.expect("Context not initialized properly, file_tree is empty")
}
/// Return the path of the root file.
///
/// Typically this will yield the same element as `current_file`, but when a note is embedded
/// within another note, this will return the outer-most note.
fn root_file(&self) -> &PathBuf {
self.file_tree
.first()
.expect("Context not initialized properly, file_tree is empty")
}
/// Return the note depth (nesting level) for this context.
fn note_depth(&self) -> usize {
self.file_tree.len()
}
/// Return the list of files associated with this context.
///
/// The first element corresponds to the root file, the final element corresponds to the file
/// which is currently being processed (see also `current_file`).
fn file_tree(&self) -> Vec<PathBuf> {
self.file_tree.clone()
}
}
impl<'a> ObsidianNoteReference<'a> {
fn from_str(text: &str) -> ObsidianNoteReference {
let captures = OBSIDIAN_NOTE_LINK_RE
.captures(&text)
.expect("note link regex didn't match - bad input?");
let file = captures.name("file").map(|v| v.as_str());
let label = captures.name("label").map(|v| v.as_str());
let section = captures.name("section").map(|v| v.as_str());
ObsidianNoteReference {
file,
label,
section,
}
}
fn display(&self) -> String {
format!("{}", self)
}
}
impl<'a> fmt::Display for ObsidianNoteReference<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let label =
self.label
.map(|v| v.to_string())
.unwrap_or_else(|| match (self.file, self.section) {
(Some(file), Some(section)) => format!("{} > {}", file, section),
(Some(file), None) => file.to_string(),
(None, Some(section)) => section.to_string(),
_ => panic!("Reference exists without file or section!"),
});
write!(f, "{}", label)
} }
} }
@ -280,6 +249,7 @@ impl<'a> Exporter<'a> {
walk_options: WalkOptions::default(), walk_options: WalkOptions::default(),
process_embeds_recursively: true, process_embeds_recursively: true,
vault_contents: None, vault_contents: None,
postprocessors: vec![],
} }
} }
@ -308,6 +278,12 @@ impl<'a> Exporter<'a> {
self self
} }
/// Append a function to the chain of [postprocessors][Postprocessor] to run on exported Obsidian Markdown notes.
pub fn add_postprocessor(&mut self, processor: &'a Postprocessor) -> &mut Exporter<'a> {
self.postprocessors.push(processor);
self
}
/// Export notes using the settings configured on this exporter. /// Export notes using the settings configured on this exporter.
pub fn run(&mut self) -> Result<()> { pub fn run(&mut self) -> Result<()> {
if !self.root.exists() { if !self.root.exists() {
@ -372,58 +348,64 @@ impl<'a> Exporter<'a> {
fn export_note(&self, src: &Path, dest: &Path) -> Result<()> { fn export_note(&self, src: &Path, dest: &Path) -> Result<()> {
match is_markdown_file(src) { match is_markdown_file(src) {
true => self.parse_and_export_obsidian_note(src, dest, self.frontmatter_strategy), true => self.parse_and_export_obsidian_note(src, dest),
false => copy_file(src, dest), false => copy_file(src, dest),
} }
.context(FileExportError { path: src }) .context(FileExportError { path: src })
} }
fn parse_and_export_obsidian_note( fn parse_and_export_obsidian_note(&self, src: &Path, dest: &Path) -> Result<()> {
&self, let mut context = Context::new(src.to_path_buf(), dest.to_path_buf());
src: &Path,
dest: &Path,
frontmatter_strategy: FrontmatterStrategy,
) -> Result<()> {
let content = fs::read_to_string(&src).context(ReadError { path: src })?;
let (mut frontmatter, _content) = let (frontmatter, mut markdown_events) = self.parse_obsidian_note(&src, &context)?;
matter::matter(&content).unwrap_or(("".to_string(), content.to_string())); context.frontmatter = frontmatter;
frontmatter = frontmatter.trim().to_string(); for func in &self.postprocessors {
//let mut outfile = create_file(&dest).context(FileIOError { filename: dest })?; let res = func(context, markdown_events);
context = res.0;
markdown_events = res.1;
match res.2 {
PostprocessorResult::StopHere => break,
PostprocessorResult::StopAndSkipNote => return Ok(()),
_ => (),
}
}
let dest = context.destination;
let mut outfile = create_file(&dest)?; let mut outfile = create_file(&dest)?;
let write_frontmatter = match self.frontmatter_strategy {
let write_frontmatter = match frontmatter_strategy {
FrontmatterStrategy::Always => true, FrontmatterStrategy::Always => true,
FrontmatterStrategy::Never => false, FrontmatterStrategy::Never => false,
FrontmatterStrategy::Auto => !frontmatter.is_empty(), FrontmatterStrategy::Auto => !context.frontmatter.is_empty(),
}; };
if write_frontmatter { if write_frontmatter {
if !frontmatter.is_empty() && !frontmatter.ends_with('\n') { let mut frontmatter_str = frontmatter_to_str(context.frontmatter)
frontmatter.push('\n'); .context(FrontMatterEncodeError { path: src })?;
} frontmatter_str.push('\n');
outfile outfile
.write_all(format!("---\n{}---\n\n", frontmatter).as_bytes()) .write_all(frontmatter_str.as_bytes())
.context(WriteError { path: &dest })?; .context(WriteError { path: &dest })?;
} }
let mut context = Context::new(src.to_path_buf());
context.set_frontmatter_strategy(frontmatter_strategy);
let markdown_tree = self.parse_obsidian_note(&src, &context)?;
outfile outfile
.write_all(render_mdtree_to_mdtext(markdown_tree).as_bytes()) .write_all(render_mdevents_to_mdtext(markdown_events).as_bytes())
.context(WriteError { path: &dest })?; .context(WriteError { path: &dest })?;
Ok(()) Ok(())
} }
fn parse_obsidian_note<'b>(&self, path: &Path, context: &Context) -> Result<MarkdownTree<'b>> { fn parse_obsidian_note<'b>(
&self,
path: &Path,
context: &Context,
) -> Result<(Frontmatter, MarkdownEvents<'b>)> {
if context.note_depth() > NOTE_RECURSION_LIMIT { if context.note_depth() > NOTE_RECURSION_LIMIT {
return Err(ExportError::RecursionLimitExceeded { return Err(ExportError::RecursionLimitExceeded {
file_tree: context.file_tree(), file_tree: context.file_tree(),
}); });
} }
let content = fs::read_to_string(&path).context(ReadError { path })?; let content = fs::read_to_string(&path).context(ReadError { path })?;
let (_frontmatter, content) = let (frontmatter, content) =
matter::matter(&content).unwrap_or(("".to_string(), content.to_string())); matter::matter(&content).unwrap_or(("".to_string(), content.to_string()));
let frontmatter =
frontmatter_from_str(&frontmatter).context(FrontMatterDecodeError { path })?;
let mut parser_options = Options::empty(); let mut parser_options = Options::empty();
parser_options.insert(Options::ENABLE_TABLES); parser_options.insert(Options::ENABLE_TABLES);
@ -432,13 +414,13 @@ impl<'a> Exporter<'a> {
parser_options.insert(Options::ENABLE_TASKLISTS); parser_options.insert(Options::ENABLE_TASKLISTS);
let mut ref_parser = RefParser::new(); let mut ref_parser = RefParser::new();
let mut tree = vec![]; let mut events = vec![];
// Most of the time, a reference triggers 5 events: [ or ![, [, <text>, ], ] // Most of the time, a reference triggers 5 events: [ or ![, [, <text>, ], ]
let mut buffer = Vec::with_capacity(5); let mut buffer = Vec::with_capacity(5);
for event in Parser::new_ext(&content, parser_options) { for event in Parser::new_ext(&content, parser_options) {
if ref_parser.state == RefParserState::Resetting { if ref_parser.state == RefParserState::Resetting {
tree.append(&mut buffer); events.append(&mut buffer);
buffer.clear(); buffer.clear();
ref_parser.reset(); ref_parser.reset();
} }
@ -455,7 +437,7 @@ impl<'a> Exporter<'a> {
ref_parser.transition(RefParserState::ExpectSecondOpenBracket); ref_parser.transition(RefParserState::ExpectSecondOpenBracket);
} }
_ => { _ => {
tree.push(event); events.push(event);
buffer.clear(); buffer.clear();
}, },
}; };
@ -500,7 +482,7 @@ impl<'a> Exporter<'a> {
), ),
context, context,
); );
tree.append(&mut elements); events.append(&mut elements);
buffer.clear(); buffer.clear();
ref_parser.transition(RefParserState::Resetting); ref_parser.transition(RefParserState::Resetting);
} }
@ -509,7 +491,7 @@ impl<'a> Exporter<'a> {
ref_parser.ref_text.clone().as_ref(), ref_parser.ref_text.clone().as_ref(),
context context
)?; )?;
tree.append(&mut elements); events.append(&mut elements);
buffer.clear(); buffer.clear();
ref_parser.transition(RefParserState::Resetting); ref_parser.transition(RefParserState::Resetting);
} }
@ -523,9 +505,12 @@ impl<'a> Exporter<'a> {
} }
} }
if !buffer.is_empty() { if !buffer.is_empty() {
tree.append(&mut buffer); events.append(&mut buffer);
} }
Ok(tree.into_iter().map(event_to_owned).collect()) Ok((
frontmatter,
events.into_iter().map(event_to_owned).collect(),
))
} }
// Generate markdown elements for a file that is embedded within another note. // Generate markdown elements for a file that is embedded within another note.
@ -533,7 +518,11 @@ impl<'a> Exporter<'a> {
// - If the file being embedded is a note, it's content is included at the point of embed. // - If the file being embedded is a note, it's content is included at the point of embed.
// - If the file is an image, an image tag is generated. // - If the file is an image, an image tag is generated.
// - For other types of file, a regular link is created instead. // - For other types of file, a regular link is created instead.
fn embed_file<'b>(&self, link_text: &'a str, context: &'a Context) -> Result<MarkdownTree<'b>> { fn embed_file<'b>(
&self,
link_text: &'a str,
context: &'a Context,
) -> Result<MarkdownEvents<'b>> {
let note_ref = ObsidianNoteReference::from_str(link_text); let note_ref = ObsidianNoteReference::from_str(link_text);
let path = match note_ref.file { let path = match note_ref.file {
@ -561,7 +550,7 @@ impl<'a> Exporter<'a> {
let child_context = Context::from_parent(context, path); let child_context = Context::from_parent(context, path);
let no_ext = OsString::new(); let no_ext = OsString::new();
if !self.process_embeds_recursively && context.file_tree.contains(path) { if !self.process_embeds_recursively && context.file_tree().contains(path) {
return Ok([ return Ok([
vec![Event::Text(CowStr::Borrowed(""))], vec![Event::Text(CowStr::Borrowed(""))],
self.make_link_to_file(note_ref, &child_context), self.make_link_to_file(note_ref, &child_context),
@ -569,13 +558,13 @@ impl<'a> Exporter<'a> {
.concat()); .concat());
} }
let tree = match path.extension().unwrap_or(&no_ext).to_str() { let events = match path.extension().unwrap_or(&no_ext).to_str() {
Some("md") => { Some("md") => {
let mut tree = self.parse_obsidian_note(&path, &child_context)?; let (_frontmatter, mut events) = self.parse_obsidian_note(&path, &child_context)?;
if let Some(section) = note_ref.section { if let Some(section) = note_ref.section {
tree = reduce_to_section(tree, section); events = reduce_to_section(events, section);
} }
tree events
} }
Some("png") | Some("jpg") | Some("jpeg") | Some("gif") | Some("webp") => { Some("png") | Some("jpg") | Some("jpeg") | Some("gif") | Some("webp") => {
self.make_link_to_file(note_ref, &child_context) self.make_link_to_file(note_ref, &child_context)
@ -605,14 +594,14 @@ impl<'a> Exporter<'a> {
} }
_ => self.make_link_to_file(note_ref, &child_context), _ => self.make_link_to_file(note_ref, &child_context),
}; };
Ok(tree) Ok(events)
} }
fn make_link_to_file<'b, 'c>( fn make_link_to_file<'b, 'c>(
&self, &self,
reference: ObsidianNoteReference<'b>, reference: ObsidianNoteReference<'b>,
context: &Context, context: &Context,
) -> MarkdownTree<'c> { ) -> MarkdownEvents<'c> {
let target_file = reference let target_file = reference
.file .file
.map(|file| lookup_filename_in_vault(file, &self.vault_contents.as_ref().unwrap())) .map(|file| lookup_filename_in_vault(file, &self.vault_contents.as_ref().unwrap()))
@ -687,7 +676,7 @@ fn lookup_filename_in_vault<'a>(
}) })
} }
fn render_mdtree_to_mdtext(markdown: MarkdownTree) -> String { fn render_mdevents_to_mdtext(markdown: MarkdownEvents) -> String {
let mut buffer = String::new(); let mut buffer = String::new();
cmark_with_options( cmark_with_options(
markdown.iter(), markdown.iter(),
@ -736,25 +725,25 @@ fn is_markdown_file(file: &Path) -> bool {
ext == "md" ext == "md"
} }
/// Reduce a given `MarkdownTree` to just those elements which are children of the given section /// Reduce a given `MarkdownEvents` to just those elements which are children of the given section
/// (heading name). /// (heading name).
fn reduce_to_section<'a, 'b>(tree: MarkdownTree<'a>, section: &'b str) -> MarkdownTree<'a> { fn reduce_to_section<'a, 'b>(events: MarkdownEvents<'a>, section: &'b str) -> MarkdownEvents<'a> {
let mut new_tree = Vec::with_capacity(tree.len()); let mut filtered_events = Vec::with_capacity(events.len());
let mut target_section_encountered = false; let mut target_section_encountered = false;
let mut currently_in_target_section = false; let mut currently_in_target_section = false;
let mut section_level = 0; let mut section_level = 0;
let mut last_level = 0; let mut last_level = 0;
let mut last_tag_was_heading = false; let mut last_tag_was_heading = false;
for event in tree.into_iter() { for event in events.into_iter() {
new_tree.push(event.clone()); filtered_events.push(event.clone());
match event { match event {
Event::Start(Tag::Heading(level)) => { Event::Start(Tag::Heading(level)) => {
last_tag_was_heading = true; last_tag_was_heading = true;
last_level = level; last_level = level;
if currently_in_target_section && level <= section_level { if currently_in_target_section && level <= section_level {
currently_in_target_section = false; currently_in_target_section = false;
new_tree.pop(); filtered_events.pop();
} }
} }
Event::Text(cowstr) => { Event::Text(cowstr) => {
@ -769,20 +758,20 @@ fn reduce_to_section<'a, 'b>(tree: MarkdownTree<'a>, section: &'b str) -> Markdo
currently_in_target_section = true; currently_in_target_section = true;
section_level = last_level; section_level = last_level;
let current_event = new_tree.pop().unwrap(); let current_event = filtered_events.pop().unwrap();
let heading_start_event = new_tree.pop().unwrap(); let heading_start_event = filtered_events.pop().unwrap();
new_tree.clear(); filtered_events.clear();
new_tree.push(heading_start_event); filtered_events.push(heading_start_event);
new_tree.push(current_event); filtered_events.push(current_event);
} }
} }
_ => {} _ => {}
} }
if target_section_encountered && !currently_in_target_section { if target_section_encountered && !currently_in_target_section {
return new_tree; return filtered_events;
} }
} }
new_tree filtered_events
} }
fn event_to_owned<'a>(event: Event) -> Event<'a> { fn event_to_owned<'a>(event: Event) -> Event<'a> {
@ -839,101 +828,3 @@ fn codeblock_kind_to_owned<'a>(codeblock_kind: CodeBlockKind) -> CodeBlockKind<'
CodeBlockKind::Fenced(cowstr) => CodeBlockKind::Fenced(CowStr::from(cowstr.into_string())), CodeBlockKind::Fenced(cowstr) => CodeBlockKind::Fenced(CowStr::from(cowstr.into_string())),
} }
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_note_refs_from_strings() {
assert_eq!(
ObsidianNoteReference::from_str("Just a note"),
ObsidianNoteReference {
file: Some("Just a note"),
label: None,
section: None,
}
);
assert_eq!(
ObsidianNoteReference::from_str("A note?"),
ObsidianNoteReference {
file: Some("A note?"),
label: None,
section: None,
}
);
assert_eq!(
ObsidianNoteReference::from_str("Note#with heading"),
ObsidianNoteReference {
file: Some("Note"),
label: None,
section: Some("with heading"),
}
);
assert_eq!(
ObsidianNoteReference::from_str("Note#Heading|Label"),
ObsidianNoteReference {
file: Some("Note"),
label: Some("Label"),
section: Some("Heading"),
}
);
assert_eq!(
ObsidianNoteReference::from_str("#Heading|Label"),
ObsidianNoteReference {
file: None,
label: Some("Label"),
section: Some("Heading"),
}
);
}
#[test]
fn test_display_of_note_refs() {
assert_eq!(
"Note",
ObsidianNoteReference {
file: Some("Note"),
label: None,
section: None,
}
.display()
);
assert_eq!(
"Note > Heading",
ObsidianNoteReference {
file: Some("Note"),
label: None,
section: Some("Heading"),
}
.display()
);
assert_eq!(
"Heading",
ObsidianNoteReference {
file: None,
label: None,
section: Some("Heading"),
}
.display()
);
assert_eq!(
"Label",
ObsidianNoteReference {
file: Some("Note"),
label: Some("Label"),
section: Some("Heading"),
}
.display()
);
assert_eq!(
"Label",
ObsidianNoteReference {
file: None,
label: Some("Label"),
section: Some("Heading"),
}
.display()
);
}
}

204
src/references.rs Normal file
View File

@ -0,0 +1,204 @@
use regex::Regex;
use std::fmt;
lazy_static! {
static ref OBSIDIAN_NOTE_LINK_RE: Regex =
Regex::new(r"^(?P<file>[^#|]+)??(#(?P<section>.+?))??(\|(?P<label>.+?))??$").unwrap();
}
#[derive(Debug, Clone, PartialEq)]
/// ObsidianNoteReference represents the structure of a `[[note]]` or `![[embed]]` reference.
pub struct ObsidianNoteReference<'a> {
/// The file (note name or partial path) being referenced.
/// This will be None in the case that the reference is to a section within the same document
pub file: Option<&'a str>,
/// If specific, a specific section/heading being referenced.
pub section: Option<&'a str>,
/// If specific, the custom label/text which was specified.
pub label: Option<&'a str>,
}
#[derive(PartialEq)]
/// RefParserState enumerates all the possible parsing states [RefParser] may enter.
pub enum RefParserState {
NoState,
ExpectSecondOpenBracket,
ExpectRefText,
ExpectRefTextOrCloseBracket,
ExpectFinalCloseBracket,
Resetting,
}
/// RefType indicates whether a note reference is a link (`[[note]]`) or embed (`![[embed]]`).
pub enum RefType {
Link,
Embed,
}
/// RefParser holds state which is used to parse Obsidian WikiLinks (`[[note]]`, `![[embed]]`).
pub struct RefParser {
pub state: RefParserState,
pub ref_type: Option<RefType>,
// References sometimes come in through multiple events. One example of this is when notes
// start with an underscore (_), presumably because this is also the literal which starts
// italic and bold text.
//
// ref_text concatenates the values from these partial events so that there's a fully-formed
// string to work with by the time the final `]]` is encountered.
pub ref_text: String,
}
impl RefParser {
pub fn new() -> RefParser {
RefParser {
state: RefParserState::NoState,
ref_type: None,
ref_text: String::new(),
}
}
pub fn transition(&mut self, new_state: RefParserState) {
self.state = new_state;
}
pub fn reset(&mut self) {
self.state = RefParserState::NoState;
self.ref_type = None;
self.ref_text.clear();
}
}
impl<'a> ObsidianNoteReference<'a> {
pub fn from_str(text: &str) -> ObsidianNoteReference {
let captures = OBSIDIAN_NOTE_LINK_RE
.captures(&text)
.expect("note link regex didn't match - bad input?");
let file = captures.name("file").map(|v| v.as_str());
let label = captures.name("label").map(|v| v.as_str());
let section = captures.name("section").map(|v| v.as_str());
ObsidianNoteReference {
file,
label,
section,
}
}
pub fn display(&self) -> String {
format!("{}", self)
}
}
impl<'a> fmt::Display for ObsidianNoteReference<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let label =
self.label
.map(|v| v.to_string())
.unwrap_or_else(|| match (self.file, self.section) {
(Some(file), Some(section)) => format!("{} > {}", file, section),
(Some(file), None) => file.to_string(),
(None, Some(section)) => section.to_string(),
_ => panic!("Reference exists without file or section!"),
});
write!(f, "{}", label)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_note_refs_from_strings() {
assert_eq!(
ObsidianNoteReference::from_str("Just a note"),
ObsidianNoteReference {
file: Some("Just a note"),
label: None,
section: None,
}
);
assert_eq!(
ObsidianNoteReference::from_str("A note?"),
ObsidianNoteReference {
file: Some("A note?"),
label: None,
section: None,
}
);
assert_eq!(
ObsidianNoteReference::from_str("Note#with heading"),
ObsidianNoteReference {
file: Some("Note"),
label: None,
section: Some("with heading"),
}
);
assert_eq!(
ObsidianNoteReference::from_str("Note#Heading|Label"),
ObsidianNoteReference {
file: Some("Note"),
label: Some("Label"),
section: Some("Heading"),
}
);
assert_eq!(
ObsidianNoteReference::from_str("#Heading|Label"),
ObsidianNoteReference {
file: None,
label: Some("Label"),
section: Some("Heading"),
}
);
}
#[test]
fn test_display_of_note_refs() {
assert_eq!(
"Note",
ObsidianNoteReference {
file: Some("Note"),
label: None,
section: None,
}
.display()
);
assert_eq!(
"Note > Heading",
ObsidianNoteReference {
file: Some("Note"),
label: None,
section: Some("Heading"),
}
.display()
);
assert_eq!(
"Heading",
ObsidianNoteReference {
file: None,
label: None,
section: Some("Heading"),
}
.display()
);
assert_eq!(
"Label",
ObsidianNoteReference {
file: Some("Note"),
label: Some("Label"),
section: Some("Heading"),
}
.display()
);
assert_eq!(
"Label",
ObsidianNoteReference {
file: None,
label: Some("Label"),
section: Some("Heading"),
}
.display()
);
}
}

View File

@ -1,6 +1,10 @@
use obsidian_export::{ExportError, Exporter, FrontmatterStrategy}; use obsidian_export::{
Context, ExportError, Exporter, FrontmatterStrategy, MarkdownEvents, PostprocessorResult,
};
use pretty_assertions::assert_eq; use pretty_assertions::assert_eq;
use std::fs::{create_dir, read_to_string, set_permissions, File, Permissions}; use pulldown_cmark::{CowStr, Event};
use serde_yaml::Value;
use std::fs::{create_dir, read_to_string, remove_file, set_permissions, File, Permissions};
use std::io::prelude::*; use std::io::prelude::*;
use std::path::PathBuf; use std::path::PathBuf;
use tempfile::TempDir; use tempfile::TempDir;
@ -350,3 +354,108 @@ fn test_same_filename_different_directories() {
let actual = read_to_string(tmp_dir.path().clone().join(PathBuf::from("Note.md"))).unwrap(); let actual = read_to_string(tmp_dir.path().clone().join(PathBuf::from("Note.md"))).unwrap();
assert_eq!(expected, actual); assert_eq!(expected, actual);
} }
/// This postprocessor replaces any instance of "foo" with "bar" in the note body.
fn foo_to_bar(
ctx: Context,
events: MarkdownEvents,
) -> (Context, MarkdownEvents, PostprocessorResult) {
let events = events
.into_iter()
.map(|event| match event {
Event::Text(text) => Event::Text(CowStr::from(text.replace("foo", "bar"))),
event => event,
})
.collect();
(ctx, events, PostprocessorResult::Continue)
}
/// This postprocessor appends "bar: baz" to frontmatter.
fn append_frontmatter(
mut ctx: Context,
events: MarkdownEvents,
) -> (Context, MarkdownEvents, PostprocessorResult) {
ctx.frontmatter.insert(
Value::String("bar".to_string()),
Value::String("baz".to_string()),
);
(ctx, events, PostprocessorResult::Continue)
}
// The purpose of this test to verify the `append_frontmatter` postprocessor is called to extend
// the frontmatter, and the `foo_to_bar` postprocessor is called to replace instances of "foo" with
// "bar" (only in the note body).
#[test]
fn test_postprocessors() {
let tmp_dir = TempDir::new().expect("failed to make tempdir");
let mut exporter = Exporter::new(
PathBuf::from("tests/testdata/input/postprocessors"),
tmp_dir.path().to_path_buf(),
);
exporter.add_postprocessor(&foo_to_bar);
exporter.add_postprocessor(&append_frontmatter);
exporter.run().unwrap();
let expected = read_to_string("tests/testdata/expected/postprocessors/Note.md").unwrap();
let actual = read_to_string(tmp_dir.path().clone().join(PathBuf::from("Note.md"))).unwrap();
assert_eq!(expected, actual);
}
#[test]
fn test_postprocessor_stophere() {
let tmp_dir = TempDir::new().expect("failed to make tempdir");
let mut exporter = Exporter::new(
PathBuf::from("tests/testdata/input/postprocessors"),
tmp_dir.path().to_path_buf(),
);
exporter.add_postprocessor(&|ctx, mdevents| (ctx, mdevents, PostprocessorResult::StopHere));
exporter.add_postprocessor(&|_, _| panic!("should not be called due to above processor"));
exporter.run().unwrap();
}
#[test]
fn test_postprocessor_stop_and_skip() {
let tmp_dir = TempDir::new().expect("failed to make tempdir");
let note_path = tmp_dir.path().clone().join(PathBuf::from("Note.md"));
let mut exporter = Exporter::new(
PathBuf::from("tests/testdata/input/postprocessors"),
tmp_dir.path().to_path_buf(),
);
exporter.run().unwrap();
assert!(note_path.exists());
remove_file(&note_path).unwrap();
exporter
.add_postprocessor(&|ctx, mdevents| (ctx, mdevents, PostprocessorResult::StopAndSkipNote));
exporter.run().unwrap();
assert!(!note_path.exists());
}
#[test]
fn test_postprocessor_change_destination() {
let tmp_dir = TempDir::new().expect("failed to make tempdir");
let original_note_path = tmp_dir.path().clone().join(PathBuf::from("Note.md"));
let mut exporter = Exporter::new(
PathBuf::from("tests/testdata/input/postprocessors"),
tmp_dir.path().to_path_buf(),
);
exporter.run().unwrap();
assert!(original_note_path.exists());
remove_file(&original_note_path).unwrap();
exporter.add_postprocessor(&|mut ctx, mdevents| {
ctx.destination.set_file_name("MovedNote.md");
(ctx, mdevents, PostprocessorResult::Continue)
});
exporter.run().unwrap();
let new_note_path = tmp_dir.path().clone().join(PathBuf::from("MovedNote.md"));
assert!(!original_note_path.exists());
assert!(new_note_path.exists());
}

View File

@ -0,0 +1,8 @@
---
foo: bar
bar: baz
---
# Title
Sentence containing bar.

View File

@ -0,0 +1,7 @@
---
foo: bar
---
# Title
Sentence containing foo.