Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse single-quoted GHC output more reliably #301

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/ghci/parse/ghc_message/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub use position::PositionRange;
mod severity;
pub use severity::Severity;

mod single_quote;
mod single_quoted;

mod path_colon;
use path_colon::path_colon;
Expand Down
7 changes: 2 additions & 5 deletions src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::ghci::parse::lines::line_ending_or_eof;
use crate::ghci::parse::lines::rest_of_line;
use crate::ghci::parse::Severity;

use super::single_quote::single_quote;
use super::single_quoted::single_quoted;
use super::GhcDiagnostic;
use super::GhcMessage;

Expand All @@ -39,10 +39,7 @@ pub fn module_import_cycle_diagnostic(input: &mut &str) -> PResult<Vec<GhcMessag
let _ = opt("which ").parse_next(input)?;
let _ = opt("imports ").parse_next(input)?;
let _ = "module ".parse_next(input)?;
let _ = single_quote.parse_next(input)?;
let _name = module_name.parse_next(input)?;
let _ = single_quote.parse_next(input)?;
let _ = space1.parse_next(input)?;
let (_name, _) = single_quoted(module_name, space1).parse_next(input)?;
let _ = "(".parse_next(input)?;
let path = take_until(1.., ")").parse_next(input)?;
let _ = ")".parse_next(input)?;
Expand Down
39 changes: 0 additions & 39 deletions src/ghci/parse/ghc_message/single_quote.rs

This file was deleted.

175 changes: 175 additions & 0 deletions src/ghci/parse/ghc_message/single_quoted.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
use winnow::combinator::alt;
use winnow::combinator::preceded;
use winnow::error::ParserError;
use winnow::stream::AsChar;
use winnow::stream::Stream;
use winnow::token::any;
use winnow::token::take_till;
use winnow::Parser;

use crate::ghci::parse::transform_till;

/// Parse a single-quoted portion of GHC output.
///
/// If Unicode is supported and `GHC_NO_UNICODE` is unset, the output will be surrounded with
/// Unicode single quotes:
///
/// ```text
/// ‘puppy’
/// ```
///
/// Otherwise, the output will be surrounded with "GNU-style" quotes:
///
/// ```text
/// `puppy'
/// ```
///
/// However, if the quoted string starts or ends with an ASCII single quote (`'`) and Unicode
/// output is disabled, the quotes will be omitted entirely:
///
/// ```text
/// puppy -> `puppy'
/// puppy' -> puppy'
/// 'puppy -> 'puppy
/// 'puppy' -> 'puppy'
/// `puppy' -> `puppy'
/// ```
///
/// Note that the quoted output for the first and last examples is the same, so the output is
/// ambiguous in this case.
///
/// See: <https://gitlab.haskell.org/ghc/ghc/-/blob/077cb2e11fa81076e8c9c5f8dd3bdfa99c8aaf8d/compiler/GHC/Utils/Outputable.hs#L744-L756>
///
/// See: <https://gitlab.haskell.org/ghc/ghc/-/blob/077cb2e11fa81076e8c9c5f8dd3bdfa99c8aaf8d/compiler/GHC/Utils/Ppr.hs#L468>
pub fn single_quoted<'i, O1, O2, E>(
mut inner: impl Parser<&'i str, O1, E>,
mut end: impl Parser<&'i str, O2, E>,
) -> impl Parser<&'i str, (O1, O2), E>
where
E: ParserError<&'i str>,
{
move |input: &mut &'i str| {
let start = input.checkpoint();

let initial = any.parse_next(input)?.as_char();
match initial {
'‘' => transform_till(
alt((preceded('’', take_till(0.., '’')), take_till(1.., '’'))),
inner.by_ref(),
preceded('’', end.by_ref()),
)
.parse_next(input),
'`' => {
// If the output starts with a backtick, it must end with a single quote.
// * Either the output is quoted normally (in which case it ends with a single quote), or
// the quotes are skipped.
// * If the quotes are skipped, then the output either starts or ends with a single quote.
// * The output starts with a backtick, so we know it doesn't start with a single quote.
// * Therefore, it must end with a single quote.
transform_till(
alt((preceded('\'', take_till(0.., '\'')), take_till(1.., '\''))),
inner.by_ref(),
preceded('\'', end.by_ref()),
)
.parse_next(input)
}
// If the output starts with anything else, the quoting must be skipped.
_ => {
input.reset(start);
// Potentially this will have to consume the entire input before backtracking. Sad!
transform_till(any, inner.by_ref(), end.by_ref()).parse_next(input)
}
}
}
}

#[cfg(test)]
mod tests {
use crate::ghci::parse::haskell_grammar::module_name;

use super::*;

use pretty_assertions::assert_eq;

#[test]
fn test_parse_single_quoted() {
// Unicode.
assert_eq!(
single_quoted(module_name, ' ').parse("‘Puppy’ ").unwrap(),
("Puppy", ' ')
);

assert_eq!(
single_quoted(module_name, ' ').parse("‘Puppy'’ ").unwrap(),
("Puppy'", ' ')
);

assert_eq!(
single_quoted(module_name, ' ').parse("‘Puppy''’ ").unwrap(),
("Puppy''", ' ')
);

// ASCII.
assert_eq!(
single_quoted(module_name, ' ').parse("`Puppy' ").unwrap(),
("Puppy", ' ')
);

// Internal quotes.
assert_eq!(
single_quoted(module_name, ' ').parse("`Pupp'y' ").unwrap(),
("Pupp'y", ' ')
);
assert_eq!(
single_quoted(module_name, ' ').parse("`Pupp''y' ").unwrap(),
("Pupp''y", ' ')
);
assert_eq!(
single_quoted(module_name, ' ')
.parse("`Pupp'''y' ")
.unwrap(),
("Pupp'''y", ' ')
);
assert_eq!(
single_quoted(module_name, ' ')
.parse("`Pupp''''y' ")
.unwrap(),
("Pupp''''y", ' ')
);

// Starts/ends with single quote.
assert_eq!(
single_quoted(module_name, ' ').parse("Puppy' ").unwrap(),
("Puppy'", ' ')
);
assert_eq!(
single_quoted(module_name, ' ').parse("Puppy'' ").unwrap(),
("Puppy''", ' ')
);
assert_eq!(
single_quoted(preceded('\'', module_name), ' ')
.parse("'Puppy ")
.unwrap(),
("Puppy", ' ')
);
assert_eq!(
single_quoted(preceded('\'', module_name), ' ')
.parse("'Puppy' ")
.unwrap(),
("Puppy'", ' ')
);

// Negative cases.

// No valid ending.
assert!(single_quoted(module_name, ' ').parse("‘Puppy’x").is_err());

// Modules can't start with numbers.
assert!(single_quoted(module_name, ' ').parse("`0' ").is_err());
assert!(single_quoted(module_name, ' ').parse("0 ").is_err());

// Delimiters have to match.
assert!(single_quoted(module_name, ' ').parse("‘Puppy' ").is_err());
assert!(single_quoted(module_name, ' ').parse("`Puppy’ ").is_err());
}
}
3 changes: 3 additions & 0 deletions src/ghci/parse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ mod lines;
mod module_and_files;
mod show_paths;
mod show_targets;
mod transform_till;

use haskell_grammar::module_name;
use lines::rest_of_line;
Expand All @@ -24,3 +25,5 @@ pub use module_and_files::CompilingModule;
pub use show_paths::parse_show_paths;
pub use show_paths::ShowPaths;
pub use show_targets::parse_show_targets;
pub use transform_till::recognize_till;
pub use transform_till::transform_till;
81 changes: 81 additions & 0 deletions src/ghci/parse/transform_till.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
use winnow::combinator::eof;
use winnow::combinator::terminated;
use winnow::error::ErrMode;
use winnow::error::ErrorKind;
use winnow::error::ParserError;
use winnow::stream::Offset;
use winnow::stream::Stream;
use winnow::stream::StreamIsPartial;
use winnow::Parser;

/// Call the `repeat` parser until the `end` parser produces a result.
///
/// Then, return the input consumed until the `end` parser was called, and the result of the `end`
/// parser.
///
/// See: <https://github.com/winnow-rs/winnow/pull/541>
pub fn recognize_till<I, Discard, O, E>(
mut repeat: impl Parser<I, Discard, E>,
mut end: impl Parser<I, O, E>,
) -> impl Parser<I, (<I as Stream>::Slice, O), E>
where
I: Stream,
E: ParserError<I>,
{
move |input: &mut I| {
let start = input.checkpoint();

loop {
let before_end = input.checkpoint();
match end.parse_next(input) {
Ok(end_parsed) => {
let after_end = input.checkpoint();

let offset_to_before_end = before_end.offset_from(&start);
input.reset(start);
let input_until_end = input.next_slice(offset_to_before_end);
input.reset(after_end);

return Ok((input_until_end, end_parsed));
}
Err(ErrMode::Backtrack(_)) => {
input.reset(before_end);
match repeat.parse_next(input) {
Ok(_) => {}
Err(e) => return Err(e.append(input, ErrorKind::Many)),
}
}
Err(e) => return Err(e),
}
}
}
}

/// Like [`recognize_till`], but it also applies a `transform` parser to the recognized input.
pub fn transform_till<I, O1, O2, Discard, E>(
mut repeat: impl Parser<I, Discard, E>,
mut transform: impl Parser<<I as Stream>::Slice, O1, E>,
mut end: impl Parser<I, O2, E>,
) -> impl Parser<I, (O1, O2), E>
where
I: Stream,
E: ParserError<I>,
E: ParserError<<I as Stream>::Slice>,
<I as Stream>::Slice: Stream + StreamIsPartial,
{
move |input: &mut I| {
let (mut until_end, end_parsed) =
recognize_till(repeat.by_ref(), end.by_ref()).parse_next(input)?;

let inner_parsed = terminated(transform.by_ref(), eof)
.parse_next(&mut until_end)
.map_err(|err_mode| match err_mode {
ErrMode::Incomplete(_) => {
panic!("complete parsers should not report `ErrMode::Incomplete(_)`")
}
ErrMode::Backtrack(inner) | ErrMode::Cut(inner) => ErrMode::Cut(inner),
})?;

Ok((inner_parsed, end_parsed))
}
}