diff --git a/CHANGES.md b/CHANGES.md index 7f5dbbddbe..92b7082dc7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -18,6 +18,8 @@ + Print odoc code block delimiters on their own line (#1980, @gpetiot) + + Optimize line/region re-indentation feature in emacs (#1609, @gpetiot) + #### New features #### RPC diff --git a/lib/Cmt_lexer.ml b/lib/Cmt_lexer.ml new file mode 100644 index 0000000000..91eabc1d83 --- /dev/null +++ b/lib/Cmt_lexer.ml @@ -0,0 +1,75 @@ +(**************************************************************************) +(* *) +(* OCamlFormat *) +(* *) +(* Copyright (c) Facebook, Inc. and its affiliates. *) +(* *) +(* This source code is licensed under the MIT license found in *) +(* the LICENSE file in the root directory of this source tree. *) +(* *) +(**************************************************************************) + +type token = Cmt of string Location.loc | S of string Location.loc + +open Location + +let inc_pos (pos : Lexing.position) = {pos with pos_cnum= pos.pos_cnum + 1} + +let nl_pos (pos : Lexing.position) = + { pos with + pos_lnum= pos.pos_lnum + 1 + ; pos_bol= pos.pos_cnum + 1 + ; pos_cnum= pos.pos_cnum + 1 } + +let update_pos pos input ~start_i ~end_i = + let rec aux pos i = + if i < end_i then + match String.unsafe_get input i with + | '\n' -> aux (nl_pos pos) (i + 1) + | _ -> aux (inc_pos pos) (i + 1) + else pos + in + (aux pos start_i, String.sub ~pos:start_i ~len:(end_i - start_i) input) + +let loc_ghost = false + +let lex_comments input = + let pos_fname = !Location.input_name in + let rec aux acc (pos : Lexing.position) = + if pos.pos_cnum >= String.length input then acc + else + match String.substr_index ~pos:pos.pos_cnum ~pattern:"(*" input with + | Some opn -> ( + let acc, pos = + if opn = pos.pos_cnum then (acc, pos) + else + let pos', s = + update_pos pos input ~start_i:pos.pos_cnum ~end_i:opn + in + let loc = {loc_ghost; loc_start= pos; loc_end= pos'} in + (S (Location.mkloc s loc) :: acc, pos') + in + match String.substr_index ~pos:opn ~pattern:"*)" input with + | Some cls -> + let pos', cmt = + update_pos pos input ~start_i:opn ~end_i:(cls + 2) + in + let loc = {loc_ghost; loc_start= pos; loc_end= pos'} in + let acc = Cmt (Location.mkloc cmt loc) :: acc in + aux acc pos' + | None -> + let pos', cmt = + update_pos pos input ~start_i:opn + ~end_i:(String.length input) + in + let loc = {loc_ghost; loc_start= pos; loc_end= pos'} in + Cmt (Location.mkloc cmt loc) :: acc ) + | None -> + let pos', s = + update_pos pos input ~start_i:pos.pos_cnum + ~end_i:(String.length input) + in + let loc = {loc_ghost; loc_start= pos; loc_end= pos'} in + S (Location.mkloc s loc) :: acc + in + List.rev (aux [] {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 0}) diff --git a/lib/Cmt_lexer.mli b/lib/Cmt_lexer.mli new file mode 100644 index 0000000000..9920919a99 --- /dev/null +++ b/lib/Cmt_lexer.mli @@ -0,0 +1,16 @@ +(**************************************************************************) +(* *) +(* OCamlFormat *) +(* *) +(* Copyright (c) Facebook, Inc. and its affiliates. *) +(* *) +(* This source code is licensed under the MIT license found in *) +(* the LICENSE file in the root directory of this source tree. *) +(* *) +(**************************************************************************) + +type token = Cmt of string Location.loc | S of string Location.loc + +val lex_comments : string -> token list +(** [lex_comments x] splits [x] into a sequence of comments and non-comments + strings. *) diff --git a/lib/Migrate_ast.ml b/lib/Migrate_ast.ml index 16ed8ffa37..3ca8735448 100644 --- a/lib/Migrate_ast.ml +++ b/lib/Migrate_ast.ml @@ -32,6 +32,11 @@ module Position = struct include (val Comparator.make ~compare ~sexp_of_t) let distance p1 p2 = p2.pos_cnum - p1.pos_cnum + + let pp fs {pos_fname; pos_lnum; pos_bol; pos_cnum} = + Format.fprintf fs + "@[<2>{ pos_fname= %S;@;pos_lnum= %i;@;pos_bol= %i;@;pos_cnum= %i }@]" + pos_fname pos_lnum pos_bol pos_cnum end module Location = struct @@ -94,6 +99,14 @@ module Location = struct let min a b = if width a < width b then a else b in List.reduce_exn (loc :: stack) ~f:min + let pp fs {loc_ghost; loc_start; loc_end} = + Format.fprintf fs + "@[<2>{ loc_ghost= %b;@;loc_start= %a;@;loc_end= %a }@]" loc_ghost + Position.pp loc_start Position.pp loc_end + + let pp_loc f fs {txt; loc} = + Format.fprintf fs "@[<2>{ txt= %a;@;loc= %a }@]" f txt pp loc + let of_lexbuf (lexbuf : Lexing.lexbuf) = { loc_start= lexbuf.lex_start_p ; loc_end= lexbuf.lex_curr_p diff --git a/lib/Migrate_ast.mli b/lib/Migrate_ast.mli index e0a2bcd13b..d3abf55e14 100644 --- a/lib/Migrate_ast.mli +++ b/lib/Migrate_ast.mli @@ -64,6 +64,9 @@ module Location : sig val is_single_line : t -> int -> bool + val pp_loc : + (Format.formatter -> 'a -> unit) -> Format.formatter -> 'a loc -> unit + val of_lexbuf : Lexing.lexbuf -> t val print : Format.formatter -> t -> unit diff --git a/lib/Slicer.ml b/lib/Slicer.ml new file mode 100644 index 0000000000..2ad0b2ec2a --- /dev/null +++ b/lib/Slicer.ml @@ -0,0 +1,222 @@ +(**************************************************************************) +(* *) +(* OCamlFormat *) +(* *) +(* Copyright (c) Facebook, Inc. and its affiliates. *) +(* *) +(* This source code is licensed under the MIT license found in *) +(* the LICENSE file in the root directory of this source tree. *) +(* *) +(**************************************************************************) + +let find_semisemi_start ?pos txt = + String.substr_index_all ~may_overlap:false ~pattern:";;" txt + |> fun l -> + ( match pos with + | Some pos -> List.filter l ~f:(fun x -> x <= pos - 2) + | None -> l ) + |> List.last + +let find_semisemi_end ?pos txt = + String.substr_index_all ~may_overlap:false ~pattern:";;" txt + |> fun l -> + ( match pos with + | Some pos -> List.filter l ~f:(fun x -> x > pos) + | None -> l ) + |> List.hd + +let find_nl_start ?pos txt = + let followed_by_space x = + match String.get txt (x + 2) with + | exception _ -> true + | ' ' -> false + | _ -> true + in + String.substr_index_all ~may_overlap:false ~pattern:"\n\n" txt + |> fun l -> + ( match pos with + | Some pos -> + List.filter l ~f:(fun x -> x <= pos - 2 && followed_by_space x) + | None -> List.filter l ~f:followed_by_space ) + |> List.last + +let find_nl_end ?pos txt = + let followed_by_space x = + match String.get txt (x + 2) with + | exception _ -> true + | ' ' -> false + | _ -> true + in + String.substr_index_all ~may_overlap:false ~pattern:"\n\n" txt + |> fun l -> + ( match pos with + | Some pos -> List.filter l ~f:(fun x -> x > pos && followed_by_space x) + | None -> List.filter l ~f:followed_by_space ) + |> List.hd + +let sub_start i (loc : Location.t) = + String.sub ~pos:(i + 2) + ~len:(loc.loc_end.pos_cnum - loc.loc_start.pos_cnum - i - 2) + +let sub_end i = String.sub ~pos:0 ~len:(i - 1) + +let find_start ?pos (lexed : Cmt_lexer.token array) ~start_i + ~split_on_semisemi = + let rec aux ?pos i = + if i < 0 then + match Array.unsafe_get lexed 0 with + | Cmt {txt; _} -> (0, txt, 0, 0) + | S {txt; _} -> (0, txt, 0, 0) + else + let lex = Array.unsafe_get lexed i in + match lex with + | Cmt _ -> aux (i - 1) + | S {txt; loc} -> ( + let ldiff max_cnum = + let rec aux lnum i = + if i >= max_cnum then lnum + else + match String.unsafe_get txt i with + | '\n' -> aux (lnum + 1) (i + 1) + | _ -> aux lnum (i + 1) + in + aux loc.loc_start.pos_lnum 0 + in + if split_on_semisemi then + match find_semisemi_start ?pos txt with + | Some x -> ( + match find_nl_start ?pos txt with + | Some y -> + let x = max x y in + let cnum = x + 2 in + let ldiff = ldiff cnum in + (i, sub_start x loc txt, cnum, ldiff) + | None -> + let cnum = x + 2 in + let ldiff = ldiff cnum in + (i, sub_start x loc txt, cnum, ldiff) ) + | None -> ( + match find_nl_start ?pos txt with + | Some x -> + let cnum = x + 2 in + let ldiff = ldiff cnum in + (i, sub_start x loc txt, cnum, ldiff) + | None -> aux (i - 1) ) + else + match find_nl_start ?pos txt with + | Some x -> + let cnum = x + 2 in + let ldiff = ldiff cnum in + (i, sub_start x loc txt, cnum, ldiff) + | None -> aux (i - 1) ) + in + aux ?pos start_i + +let find_end ?pos (lexed : Cmt_lexer.token array) ~end_i ~split_on_semisemi = + let max_i = Array.length lexed - 1 in + let rec aux ?pos i = + if i > max_i then + match Array.unsafe_get lexed max_i with + | Cmt {txt; _} -> (max_i, txt, String.length txt - 1) + | S {txt; _} -> (max_i, txt, String.length txt - 1) + else + let lex = Array.unsafe_get lexed i in + match lex with + | Cmt _ -> aux (i + 1) + | S {txt; loc= _} -> ( + if split_on_semisemi then + match find_semisemi_end ?pos txt with + | Some x -> ( + match find_nl_end ?pos txt with + | Some y -> + let x = min x y in + (i, sub_end x txt, x - 1) + | None -> (i, sub_end x txt, x - 1) ) + | None -> ( + match find_nl_end ?pos txt with + | Some x -> (i, sub_end x txt, x - 1) + | None -> aux (i + 1) ) + else + match find_nl_end ?pos txt with + | Some x -> (i, sub_end x txt, x - 1) + | None -> aux (i + 1) ) + in + aux ?pos end_i + +let split ~range:((low, high) as range) ~split_on_semisemi = function + | [] -> ("", range) + | lexed -> + let lexed : Cmt_lexer.token array = Array.of_list lexed in + let high = + match Array.unsafe_get lexed (Array.length lexed - 1) with + | Cmt {loc; _} | S {loc; _} -> + if high - 1 = loc.loc_end.pos_lnum + 1 then high - 1 else high + in + let range = (low, high) in + let start_i, start_lex = + Array.findi_exn lexed ~f:(fun _ -> function + | Cmt {loc; _} | S {loc; _} -> + loc.loc_start.pos_lnum <= low - 1 + && low - 1 <= loc.loc_end.pos_lnum ) + in + let end_i, end_lex = + Array.findi_exn lexed ~f:(fun _ -> function + | Cmt {loc; _} | S {loc; _} -> + loc.loc_start.pos_lnum <= high - 1 + && high - 1 <= loc.loc_end.pos_lnum ) + in + let start_i, start_lex, start_pos, ldiff = + match start_lex with + | S {txt; loc} -> + let rec aux ~lnum i = + if lnum = low - 1 then i + else + match String.unsafe_get txt i with + | '\n' -> aux ~lnum:(lnum + 1) (i + 1) + | _ -> aux ~lnum (i + 1) + in + let pos = aux 0 ~lnum:loc.loc_start.pos_lnum in + find_start ~pos ~start_i ~split_on_semisemi lexed + | Cmt _ -> find_start ~start_i ~split_on_semisemi lexed + in + let end_i, end_lex, end_pos = + match end_lex with + | S {txt; loc} -> + let rec aux ~lnum i = + if lnum = high - 1 then i + else + match String.unsafe_get txt i with + | '\n' -> aux ~lnum:(lnum + 1) (i + 1) + | _ -> aux ~lnum (i + 1) + in + let pos = aux 0 ~lnum:loc.loc_start.pos_lnum in + find_end ~pos ~end_i ~split_on_semisemi lexed + | Cmt _ -> find_end ~end_i ~split_on_semisemi lexed + in + if start_i = end_i then + match Array.unsafe_get lexed start_i with + | Cmt {txt; _} -> (txt, range) + | S {txt; _} -> + ( String.sub txt ~pos:start_pos ~len:(end_pos - start_pos + 1) + , (low - ldiff, high - ldiff) ) + else + let rec aux acc i = + if i = end_i then acc ^ end_lex + else + match Array.unsafe_get lexed i with + | Cmt {txt; _} -> aux (acc ^ txt) (i + 1) + | S {txt; _} -> aux (acc ^ txt) (i + 1) + in + (aux start_lex (start_i + 1), (low - ldiff, high - ldiff)) + +let fragment (type a) (fg : a Extended_ast.t) ~range input = + let split_on_semisemi = + match fg with + | Structure | Use_file -> true + | Signature -> false + | Core_type -> failwith "Slicer.fragment not implemented for Core_type" + | Module_type -> + failwith "Slicer.fragment not implemented for Module_type" + | Expression -> failwith "Slicer.fragment not implemented for Expression" + in + Cmt_lexer.lex_comments input |> split ~range ~split_on_semisemi diff --git a/lib/Slicer.mli b/lib/Slicer.mli new file mode 100644 index 0000000000..311c3d5723 --- /dev/null +++ b/lib/Slicer.mli @@ -0,0 +1,23 @@ +(**************************************************************************) +(* *) +(* OCamlFormat *) +(* *) +(* Copyright (c) Facebook, Inc. and its affiliates. *) +(* *) +(* This source code is licensed under the MIT license found in *) +(* the LICENSE file in the root directory of this source tree. *) +(* *) +(**************************************************************************) + +val split : + range:int * int + -> split_on_semisemi:bool + -> Cmt_lexer.token list + -> string * (int * int) +(** Exposed for tests. *) + +val fragment : + 'a Extended_ast.t -> range:int * int -> string -> string * (int * int) +(** [fragment fg ~range x] slices [x] into a smaller string, filtered + according to the range of lines [range], and containing the relevant + context so we can infer the indentation to apply to this range of lines. *) diff --git a/lib/Translation_unit.ml b/lib/Translation_unit.ml index d13fc4a751..12b3c3dab8 100644 --- a/lib/Translation_unit.ml +++ b/lib/Translation_unit.ml @@ -501,8 +501,8 @@ let check_range nlines (low, high) = else Error (Error.User_error (Format.sprintf "Invalid range %i-%i" low high)) -let numeric (type a b) (fg : a list Extended_ast.t) - (std_fg : b list Std_ast.t) ~input_name ~source ~range conf opts = +let numeric (type a b) (fg : a Extended_ast.t) (std_fg : b Std_ast.t) + ~input_name ~source ~range conf opts = let lines = String.split_lines source in let nlines = List.length lines in check_range nlines range @@ -533,12 +533,36 @@ let numeric (type a b) (fg : a list Extended_ast.t) | Ok parsed -> Ok parsed | Error _ -> parse_result recover fg conf ~source:src ~input_name in - match parse_or_recover ~src:source with - | Ok parsed -> ( - match parse_result Std_ast.Parse.ast std_fg conf ~source ~input_name with - | Ok std_parsed -> indent_parsed parsed std_parsed ~src:source ~range - | Error _ -> fallback () ) - | Error _ -> fallback () + (* Slice the file if it is too long *) + if nlines > 100 then + let sliced_src, sliced_range = Slicer.fragment fg ~range source in + match parse_or_recover ~src:sliced_src with + | Ok parsed -> ( + match + parse_result Std_ast.Parse.ast std_fg conf ~source:sliced_src + ~input_name + with + | Ok std_parsed -> + indent_parsed parsed std_parsed ~src:sliced_src ~range:sliced_range + | Error _ -> fallback () ) + | Error _ -> ( + match parse_or_recover ~src:source with + | Ok parsed -> ( + match + parse_result Std_ast.Parse.ast std_fg conf ~source ~input_name + with + | Ok std_parsed -> indent_parsed parsed std_parsed ~src:source ~range + | Error _ -> fallback () ) + | Error _ -> fallback () ) + else + match parse_or_recover ~src:source with + | Ok parsed -> ( + match + parse_result Std_ast.Parse.ast std_fg conf ~source ~input_name + with + | Ok std_parsed -> indent_parsed parsed std_parsed ~src:source ~range + | Error _ -> fallback () ) + | Error _ -> fallback () let numeric = function | Syntax.Structure -> numeric Structure Structure diff --git a/test/unit/test_cmt_lexer.ml b/test/unit/test_cmt_lexer.ml new file mode 100644 index 0000000000..2f0e33e5d6 --- /dev/null +++ b/test/unit/test_cmt_lexer.ml @@ -0,0 +1,370 @@ +open Ocamlformat_lib.Cmt_lexer +open Ocamlformat_lib.Migrate_ast.Location + +let lex = + let pp fs = function + | Cmt x -> Fmt.pf fs "(Cmt %a)" (pp_loc Fmt.string) x + | S x -> Fmt.pf fs "(S %a)" (pp_loc Fmt.string) x + in + Alcotest.testable pp ( = ) + +let test_lex_comments = + let make_test name ~input ~expected = + let test_name = "lex_comments: " ^ name in + let test_fun () = + let actual = lex_comments input in + Alcotest.(check (list lex)) test_name expected actual + in + (test_name, `Quick, test_fun) + in + let loc_ghost = false in + let pos_fname = "_none_" in + [ make_test "empty" ~input:"" ~expected:[] + ; make_test "multi empty" ~input:"\n\n" + ~expected: + [ S + { txt= "\n\n" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 0 + ; pos_bol= 0 + ; pos_cnum= 0 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 2 + ; pos_bol= 2 + ; pos_cnum= 2 } } } ] + ; make_test "cmt before" ~input:"(* fooooooooooooo *)\nbar\n" + ~expected: + [ Cmt + { txt= "(* fooooooooooooo *)" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 0} + ; loc_end= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 20} + } } + ; S + { txt= "\nbar\n" + ; loc= + { loc_ghost + ; loc_start= + {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 20} + ; loc_end= {pos_fname; pos_lnum= 2; pos_bol= 25; pos_cnum= 25} + } } ] + ; make_test "cmt multi before" ~input:"(* foooooo\n fooooooooooooo *)\nbar" + ~expected: + [ Cmt + { txt= "(* foooooo\n fooooooooooooo *)" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 0} + ; loc_end= {pos_fname; pos_lnum= 1; pos_bol= 11; pos_cnum= 29} + } } + ; S + { txt= "\nbar" + ; loc= + { loc_ghost + ; loc_start= + {pos_fname; pos_lnum= 1; pos_bol= 11; pos_cnum= 29} + ; loc_end= {pos_fname; pos_lnum= 2; pos_bol= 30; pos_cnum= 33} + } } ] + ; make_test "cmt after" ~input:"foo\n(* bar *)" + ~expected: + [ S + { txt= "foo\n" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 0} + ; loc_end= {pos_fname; pos_lnum= 1; pos_bol= 4; pos_cnum= 4} + } } + ; Cmt + { txt= "(* bar *)" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 1; pos_bol= 4; pos_cnum= 4} + ; loc_end= {pos_fname; pos_lnum= 1; pos_bol= 4; pos_cnum= 13} + } } ] + ; make_test "cmt multi after" ~input:"fooooo\n(* baaaaaar\n baaaaaar *)" + ~expected: + [ S + { txt= "fooooo\n" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 0} + ; loc_end= {pos_fname; pos_lnum= 1; pos_bol= 7; pos_cnum= 7} + } } + ; Cmt + { txt= "(* baaaaaar\n baaaaaar *)" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 1; pos_bol= 7; pos_cnum= 7} + ; loc_end= {pos_fname; pos_lnum= 2; pos_bol= 19; pos_cnum= 32} + } } ] + ; make_test "2 cmts" ~input:"(* foo *)(* bar *)" + ~expected: + [ Cmt + { txt= "(* foo *)" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 0} + ; loc_end= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 9} + } } + ; Cmt + { txt= "(* bar *)" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 9} + ; loc_end= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 18} + } } ] + ; make_test "2 cmts break" ~input:"(* foo *)\n(* bar *)" + ~expected: + [ Cmt + { txt= "(* foo *)" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 0} + ; loc_end= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 9} + } } + ; S + { txt= "\n" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 9} + ; loc_end= {pos_fname; pos_lnum= 1; pos_bol= 10; pos_cnum= 10} + } } + ; Cmt + { txt= "(* bar *)" + ; loc= + { loc_ghost + ; loc_start= + {pos_fname; pos_lnum= 1; pos_bol= 10; pos_cnum= 10} + ; loc_end= {pos_fname; pos_lnum= 1; pos_bol= 10; pos_cnum= 19} + } } ] + ; make_test "let after module" + ~input:{| +module X = struct + let x = [ + + let y = bar +end + +let f = +|} + ~expected: + [ S + { txt= + "\n\ + module X = struct\n\ + \ let x = [\n\n\ + \ let y = bar\n\ + end\n\n\ + let f =\n" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 0 + ; pos_bol= 0 + ; pos_cnum= 0 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 8 + ; pos_bol= 59 + ; pos_cnum= 59 } } } ] + ; make_test "2 let 1l split" ~input:"let x = x\nlet y = y" + ~expected: + [ S + { txt= "let x = x\nlet y = y" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 0 + ; pos_bol= 0 + ; pos_cnum= 0 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 1 + ; pos_bol= 10 + ; pos_cnum= 19 } } } ] + ; make_test "already formatted" + ~input: + {|let foooooo = + let baaaaar = + let woooooo = foooooo in + let xooooo = bar + foo in + woooooo + in + bar +|} + ~expected: + [ S + { txt= + "let foooooo =\n\ + \ let baaaaar =\n\ + \ let woooooo = foooooo in\n\ + \ let xooooo = bar + foo in\n\ + \ woooooo\n\ + \ in\n\ + \ bar\n" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 0 + ; pos_bol= 0 + ; pos_cnum= 0 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 7 + ; pos_bol= 112 + ; pos_cnum= 112 } } } ] + ; make_test "ill-indent" + ~input: + {| let () = + ffff; + hhhhhh; + fff; + let (quot, _rem) = + let quot_rem n k = + let (d, m) = (n / k, n mod k) in + if d < 0 && m > 0 then (d+1, m-k) +else (d, m) + in + let quot n k = fst (quot_rem n k) in + let rem n k = snd (quot_rem n k) in + +quot, rem +|} + ~expected: + [ S + { txt= + {| let () = + ffff; + hhhhhh; + fff; + let (quot, _rem) = + let quot_rem n k = + let (d, m) = (n / k, n mod k) in + if d < 0 && m > 0 then (d+1, m-k) +else (d, m) + in + let quot n k = fst (quot_rem n k) in + let rem n k = snd (quot_rem n k) in + +quot, rem +|} + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 0 + ; pos_bol= 0 + ; pos_cnum= 0 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 14 + ; pos_bol= 275 + ; pos_cnum= 275 } } } ] + ; make_test "comment header" + ~input: + {|(**************************************************************************) +(**************************************************************************) + +module Format = Format_ + +(** Format OCaml Ast *) + +open Migrate_ast +|} + ~expected: + [ Cmt + { txt= + "(**************************************************************************)" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 0 + ; pos_bol= 0 + ; pos_cnum= 0 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 0 + ; pos_bol= 0 + ; pos_cnum= 76 } } } + ; S + { txt= "\n" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 0 + ; pos_bol= 0 + ; pos_cnum= 76 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 1 + ; pos_bol= 77 + ; pos_cnum= 77 } } } + ; Cmt + { txt= + "(**************************************************************************)" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 1 + ; pos_bol= 77 + ; pos_cnum= 77 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 1 + ; pos_bol= 77 + ; pos_cnum= 153 } } } + ; S + { txt= "\n\nmodule Format = Format_\n\n" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 1 + ; pos_bol= 77 + ; pos_cnum= 153 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 5 + ; pos_bol= 180 + ; pos_cnum= 180 } } } + ; Cmt + { txt= "(** Format OCaml Ast *)" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 5 + ; pos_bol= 180 + ; pos_cnum= 180 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 5 + ; pos_bol= 180 + ; pos_cnum= 203 } } } + ; S + { txt= "\n\nopen Migrate_ast\n" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 5 + ; pos_bol= 180 + ; pos_cnum= 203 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 8 + ; pos_bol= 222 + ; pos_cnum= 222 } } } ] ] + +let tests = test_lex_comments diff --git a/test/unit/test_cmt_lexer.mli b/test/unit/test_cmt_lexer.mli new file mode 100644 index 0000000000..d38ba9a90a --- /dev/null +++ b/test/unit/test_cmt_lexer.mli @@ -0,0 +1 @@ +val tests : unit Alcotest.test_case list diff --git a/test/unit/test_slicer.ml b/test/unit/test_slicer.ml new file mode 100644 index 0000000000..561eb61f6b --- /dev/null +++ b/test/unit/test_slicer.ml @@ -0,0 +1,449 @@ +open Ocamlformat_lib.Slicer +open Ocamlformat_lib.Migrate_ast.Location + +let test_split = + let make_test name ~range ~input ~split_on_semisemi ~expected ~new_range = + let test_name = "split: " ^ name in + let test_fun () = + let actual = split input ~range ~split_on_semisemi in + Alcotest.(check (pair string (pair int int))) + test_name (expected, new_range) actual + in + (test_name, `Quick, test_fun) + in + let loc_ghost = false in + let pos_fname = "_none_" in + [ make_test "empty" ~range:(1, 1) ~input:[] ~expected:"" ~new_range:(1, 1) + ~split_on_semisemi:true + ; make_test "multi empty" ~range:(1, 3) ~split_on_semisemi:true + ~input: + [ S + { txt= "\n\n" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 0 + ; pos_bol= 0 + ; pos_cnum= 0 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 2 + ; pos_bol= 2 + ; pos_cnum= 2 } } } ] + ~expected:"\n\n" ~new_range:(1, 3) + ; make_test "cmt before" ~range:(1, 3) ~split_on_semisemi:true + ~input: + [ Cmt + { txt= "(* fooooooooooooo *)" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 0} + ; loc_end= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 20} + } } + ; S + { txt= "\nbar\n" + ; loc= + { loc_ghost + ; loc_start= + {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 20} + ; loc_end= {pos_fname; pos_lnum= 2; pos_bol= 25; pos_cnum= 25} + } } ] + ~expected:"(* fooooooooooooo *)\nbar\n" ~new_range:(1, 3) + ; make_test "cmt after" ~range:(1, 2) ~split_on_semisemi:true + ~input: + [ S + { txt= "foo\n" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 0} + ; loc_end= {pos_fname; pos_lnum= 1; pos_bol= 4; pos_cnum= 4} + } } + ; Cmt + { txt= "(* bar *)" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 1; pos_bol= 4; pos_cnum= 4} + ; loc_end= {pos_fname; pos_lnum= 1; pos_bol= 4; pos_cnum= 13} + } } ] + ~expected:"foo\n(* bar *)" ~new_range:(1, 2) + ; make_test "2 cmts" ~range:(1, 1) ~split_on_semisemi:true + ~input: + [ Cmt + { txt= "(* foo *)" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 0} + ; loc_end= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 9} + } } + ; Cmt + { txt= "(* bar *)" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 9} + ; loc_end= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 18} + } } ] + ~expected:"(* foo *)(* bar *)" ~new_range:(1, 1) + ; make_test "2 cmts break" ~range:(1, 2) ~split_on_semisemi:true + ~input: + [ Cmt + { txt= "(* foo *)" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 0} + ; loc_end= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 9} + } } + ; S + { txt= "\n" + ; loc= + { loc_ghost + ; loc_start= {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 9} + ; loc_end= {pos_fname; pos_lnum= 1; pos_bol= 10; pos_cnum= 10} + } } + ; Cmt + { txt= "(* bar *)" + ; loc= + { loc_ghost + ; loc_start= + {pos_fname; pos_lnum= 1; pos_bol= 10; pos_cnum= 10} + ; loc_end= {pos_fname; pos_lnum= 1; pos_bol= 10; pos_cnum= 19} + } } ] + ~expected:"(* foo *)\n(* bar *)" ~new_range:(1, 2) + ; make_test "let after module" ~range:(8, 8) ~split_on_semisemi:true + ~input: + [ S + { txt= + {| +module X = struct + let x = [ + + let y = bar +end + +let f = +|} + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 0 + ; pos_bol= 0 + ; pos_cnum= 0 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 8 + ; pos_bol= 59 + ; pos_cnum= 59 } } } ] + ~expected:"let f =\n" ~new_range:(1, 1) + ; make_test "2 let 1l split" ~range:(1, 1) ~split_on_semisemi:true + ~input: + [ S + { txt= "let x = x\nlet y = y" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 0 + ; pos_bol= 0 + ; pos_cnum= 0 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 1 + ; pos_bol= 10 + ; pos_cnum= 19 } } } ] + ~expected:"let x = x\nlet y = y" ~new_range:(1, 1) + ; make_test "already formatted" ~range:(1, 7) ~split_on_semisemi:true + ~input: + [ S + { txt= + {|let foooooo = + let baaaaar = + let woooooo = foooooo in + let xooooo = bar + foo in + woooooo + in + bar +|} + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 0 + ; pos_bol= 0 + ; pos_cnum= 0 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 7 + ; pos_bol= 112 + ; pos_cnum= 112 } } } ] + ~expected: + {|let foooooo = + let baaaaar = + let woooooo = foooooo in + let xooooo = bar + foo in + woooooo + in + bar +|} + ~new_range:(1, 7) + ; make_test "ill-indent" ~range:(1, 14) ~split_on_semisemi:true + ~input: + [ S + { txt= + {| let () = + ffff; + hhhhhh; + fff; + let (quot, _rem) = + let quot_rem n k = + let (d, m) = (n / k, n mod k) in + if d < 0 && m > 0 then (d+1, m-k) +else (d, m) + in + let quot n k = fst (quot_rem n k) in + let rem n k = snd (quot_rem n k) in + +quot, rem +|} + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 0 + ; pos_bol= 0 + ; pos_cnum= 0 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 14 + ; pos_bol= 275 + ; pos_cnum= 275 } } } ] + ~expected: + {| let () = + ffff; + hhhhhh; + fff; + let (quot, _rem) = + let quot_rem n k = + let (d, m) = (n / k, n mod k) in + if d < 0 && m > 0 then (d+1, m-k) +else (d, m) + in + let quot n k = fst (quot_rem n k) in + let rem n k = snd (quot_rem n k) in + +quot, rem +|} + ~new_range:(1, 14) + ; make_test "comment header" ~range:(4, 4) ~new_range:(1, 1) + ~split_on_semisemi:true + ~input: + [ Cmt + { txt= + "(**************************************************************************)" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 0 + ; pos_bol= 0 + ; pos_cnum= 0 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 0 + ; pos_bol= 0 + ; pos_cnum= 76 } } } + ; S + { txt= "\n" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 0 + ; pos_bol= 0 + ; pos_cnum= 76 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 1 + ; pos_bol= 77 + ; pos_cnum= 77 } } } + ; Cmt + { txt= + "(**************************************************************************)" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 1 + ; pos_bol= 77 + ; pos_cnum= 77 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 1 + ; pos_bol= 77 + ; pos_cnum= 153 } } } + ; S + { txt= "\n\nmodule Format = Format_\n\n" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 1 + ; pos_bol= 77 + ; pos_cnum= 153 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 5 + ; pos_bol= 180 + ; pos_cnum= 180 } } } + ; Cmt + { txt= "(** Format OCaml Ast *)" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 5 + ; pos_bol= 180 + ; pos_cnum= 180 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 5 + ; pos_bol= 180 + ; pos_cnum= 203 } } } + ; S + { txt= "\n\nopen Migrate_ast\n" + ; loc= + { loc_ghost= false + ; loc_start= + { pos_fname= "_none_" + ; pos_lnum= 5 + ; pos_bol= 180 + ; pos_cnum= 203 } + ; loc_end= + { pos_fname= "_none_" + ; pos_lnum= 8 + ; pos_bol= 222 + ; pos_cnum= 222 } } } ] + ~expected:"module Format = Format_" ] + +let test_use_file = + let make_test name ~range ~input ~expected ~new_range = + let test_name = "use_file: " ^ name in + let test_fun () = + let actual = fragment Use_file input ~range in + Alcotest.(check (pair string (pair int int))) + test_name (expected, new_range) actual + in + (test_name, `Quick, test_fun) + in + [ make_test "empty" ~range:(1, 1) ~input:"" ~expected:"" ~new_range:(1, 1) + ; make_test "multi empty" ~range:(1, 3) ~input:"\n\n" ~expected:"\n\n" + ~new_range:(1, 3) + ; make_test "invalid let" ~range:(1, 1) ~input:"let x" ~expected:"let x" + ~new_range:(1, 1) + ; make_test "valid let" ~range:(1, 1) ~input:"let x = 2" + ~expected:"let x = 2" ~new_range:(1, 1) + ; make_test "2 let 1l split (a)" ~range:(1, 2) + ~input:"let x = x\nlet y = y" ~expected:"let x = x\nlet y = y" + ~new_range:(1, 2) + ; make_test "2 let 1l split (b)" ~range:(1, 1) + ~input:"let x = x\nlet y = y" ~expected:"let x = x\nlet y = y" + ~new_range:(1, 1) + ; make_test "2 let 1l split (c)" ~range:(2, 2) + ~input:"let x = x\nlet y = y" ~expected:"let x = x\nlet y = y" + ~new_range:(2, 2) + ; make_test "2 let 2l split (a)" ~range:(1, 3) + ~input:"let x = x\n\nlet y = y" ~expected:"let x = x\n\nlet y = y" + ~new_range:(1, 3) + ; make_test "2 let 2l split (b)" ~range:(1, 2) + ~input:"let x = x\n\nlet y = y" ~expected:"let x = x\n\nlet y = y" + ~new_range:(1, 2) + ; make_test "2 let 2l split (c)" ~range:(2, 3) + ~input:"let x = x\n\nlet y = y" ~expected:"let x = x\n\nlet y = y" + ~new_range:(2, 3) + ; make_test "2 let 2l split (d)" ~range:(3, 3) + ~input:"let x = x\n\nlet y = y" ~expected:"let y = y" ~new_range:(1, 1) + ; make_test "2 let mix split" ~range:(1, 4) + ~input:"let x =\n\nx\nlet y = y" ~expected:"let x =\n\nx\nlet y = y" + ~new_range:(1, 4) + ; make_test "let after module" ~range:(8, 8) + ~input:{| +module X = struct + let x = [ + + let y = bar +end + +let f = +|} + ~expected:"let f =\n" ~new_range:(1, 1) + ; make_test "already formatted" ~range:(1, 7) + ~input: + {|let foooooo = + let baaaaar = + let woooooo = foooooo in + let xooooo = bar + foo in + woooooo + in + bar +|} + ~expected: + {|let foooooo = + let baaaaar = + let woooooo = foooooo in + let xooooo = bar + foo in + woooooo + in + bar +|} + ~new_range:(1, 7) + ; make_test "already formatted function" ~range:(9, 9) + ~input: + {|let x = x + +let fmt_expressions c width sub_exp exprs fmt_expr + (p : Params.elements_collection) = + match c.conf.break_collection_expressions with + | `Fit_or_vertical -> fmt_elements_collection p fmt_expr exprs + | `Wrap -> + let is_simple x = is_simple c.conf width (sub_exp x) in + let break x1 x2 = not (is_simple x1 && is_simple x2) in + let grps = List.group exprs ~break in + let fmt_grp ~first:first_grp ~last:last_grp exprs = + fmt_elements_collection ~first_sep:first_grp ~last_sep:last_grp p + fmt_expr exprs + in + list_fl grps fmt_grp + +let y = y|} + ~new_range:(7, 7) + ~expected: + {|let fmt_expressions c width sub_exp exprs fmt_expr + (p : Params.elements_collection) = + match c.conf.break_collection_expressions with + | `Fit_or_vertical -> fmt_elements_collection p fmt_expr exprs + | `Wrap -> + let is_simple x = is_simple c.conf width (sub_exp x) in + let break x1 x2 = not (is_simple x1 && is_simple x2) in + let grps = List.group exprs ~break in + let fmt_grp ~first:first_grp ~last:last_grp exprs = + fmt_elements_collection ~first_sep:first_grp ~last_sep:last_grp p + fmt_expr exprs + in + list_fl grps fmt_grp|} + ] + +let test_interface = + let make_test name ~range ~input ~expected ~new_range = + let test_name = "interface: " ^ name in + let test_fun () = + let actual = fragment Signature input ~range in + Alcotest.(check (pair string (pair int int))) + test_name (expected, new_range) actual + in + (test_name, `Quick, test_fun) + in + [ make_test "empty" ~range:(1, 1) ~input:"" ~expected:"" ~new_range:(1, 1) + ; make_test "multi empty" ~range:(1, 3) ~input:"\n\n" ~expected:"\n\n" + ~new_range:(1, 3) ] + +let tests = test_split @ test_use_file @ test_interface diff --git a/test/unit/test_slicer.mli b/test/unit/test_slicer.mli new file mode 100644 index 0000000000..d38ba9a90a --- /dev/null +++ b/test/unit/test_slicer.mli @@ -0,0 +1 @@ +val tests : unit Alcotest.test_case list diff --git a/test/unit/test_unit.ml b/test/unit/test_unit.ml index 34061617cf..d71df23a06 100644 --- a/test/unit/test_unit.ml +++ b/test/unit/test_unit.ml @@ -114,9 +114,11 @@ let tests = [ ("Location", Test_location.tests) ; ("non overlapping interval tree", Test_noit.tests) ; ("Ast", Test_ast.tests) + ; ("Cmt_lexer", Test_cmt_lexer.tests) ; ("Indent", Test_indent.tests) ; ("Literal_lexer", Test_literal_lexer.tests) ; ("Fmt", Test_fmt.tests) + ; ("Slicer", Test_slicer.tests) ; ("Translation_unit", Test_translation_unit.tests) ] let () = Alcotest.run "ocamlformat" tests