Skip to content

Commit

Permalink
Slicing optimization for line/region indentation feature for emacs
Browse files Browse the repository at this point in the history
  • Loading branch information
gpetiot committed Oct 28, 2021
1 parent 2d6fc3f commit 3449dba
Show file tree
Hide file tree
Showing 13 changed files with 1,240 additions and 8 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@

+ Apply option 'module-item-spacing' on mutually recursive type declarations for more consistency (#1854, @gpetiot)

+ Optimize line/region re-indentation feature in emacs (#1609, @gpetiot)

#### New features

+ Handle merlin typed holes (#1698, @gpetiot)
Expand Down
75 changes: 75 additions & 0 deletions lib/Cmt_lexer.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
(**************************************************************************)
(* *)
(* OCamlFormat *)
(* *)
(* Copyright (c) Facebook, Inc. and its affiliates. *)
(* *)
(* This source code is licensed under the MIT license found in *)
(* the LICENSE file in the root directory of this source tree. *)
(* *)
(**************************************************************************)

type token = Cmt of string Location.loc | S of string Location.loc

open Location

let inc_pos (pos : Lexing.position) = {pos with pos_cnum= pos.pos_cnum + 1}

let nl_pos (pos : Lexing.position) =
{ pos with
pos_lnum= pos.pos_lnum + 1
; pos_bol= pos.pos_cnum + 1
; pos_cnum= pos.pos_cnum + 1 }

let update_pos pos input ~start_i ~end_i =
let rec aux pos i =
if i < end_i then
match String.unsafe_get input i with
| '\n' -> aux (nl_pos pos) (i + 1)
| _ -> aux (inc_pos pos) (i + 1)
else pos
in
(aux pos start_i, String.sub ~pos:start_i ~len:(end_i - start_i) input)

let loc_ghost = false

let lex_comments input =
let pos_fname = !Location.input_name in
let rec aux acc (pos : Lexing.position) =
if pos.pos_cnum >= String.length input then acc
else
match String.substr_index ~pos:pos.pos_cnum ~pattern:"(*" input with
| Some opn -> (
let acc, pos =
if opn = pos.pos_cnum then (acc, pos)
else
let pos', s =
update_pos pos input ~start_i:pos.pos_cnum ~end_i:opn
in
let loc = {loc_ghost; loc_start= pos; loc_end= pos'} in
(S (Location.mkloc s loc) :: acc, pos')
in
match String.substr_index ~pos:opn ~pattern:"*)" input with
| Some cls ->
let pos', cmt =
update_pos pos input ~start_i:opn ~end_i:(cls + 2)
in
let loc = {loc_ghost; loc_start= pos; loc_end= pos'} in
let acc = Cmt (Location.mkloc cmt loc) :: acc in
aux acc pos'
| None ->
let pos', cmt =
update_pos pos input ~start_i:opn
~end_i:(String.length input)
in
let loc = {loc_ghost; loc_start= pos; loc_end= pos'} in
Cmt (Location.mkloc cmt loc) :: acc )
| None ->
let pos', s =
update_pos pos input ~start_i:pos.pos_cnum
~end_i:(String.length input)
in
let loc = {loc_ghost; loc_start= pos; loc_end= pos'} in
S (Location.mkloc s loc) :: acc
in
List.rev (aux [] {pos_fname; pos_lnum= 0; pos_bol= 0; pos_cnum= 0})
16 changes: 16 additions & 0 deletions lib/Cmt_lexer.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
(**************************************************************************)
(* *)
(* OCamlFormat *)
(* *)
(* Copyright (c) Facebook, Inc. and its affiliates. *)
(* *)
(* This source code is licensed under the MIT license found in *)
(* the LICENSE file in the root directory of this source tree. *)
(* *)
(**************************************************************************)

type token = Cmt of string Location.loc | S of string Location.loc

val lex_comments : string -> token list
(** [lex_comments x] splits [x] into a sequence of comments and non-comments
strings. *)
13 changes: 13 additions & 0 deletions lib/Migrate_ast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ module Position = struct
include (val Comparator.make ~compare ~sexp_of_t)

let distance p1 p2 = p2.pos_cnum - p1.pos_cnum

let pp fs {pos_fname; pos_lnum; pos_bol; pos_cnum} =
Format.fprintf fs
"@[<2>{ pos_fname= %S;@;pos_lnum= %i;@;pos_bol= %i;@;pos_cnum= %i }@]"
pos_fname pos_lnum pos_bol pos_cnum
end

module Location = struct
Expand Down Expand Up @@ -110,6 +115,14 @@ module Location = struct
let min a b = if width a < width b then a else b in
List.reduce_exn (loc :: stack) ~f:min

let pp fs {loc_ghost; loc_start; loc_end} =
Format.fprintf fs
"@[<2>{ loc_ghost= %b;@;loc_start= %a;@;loc_end= %a }@]" loc_ghost
Position.pp loc_start Position.pp loc_end

let pp_loc f fs {txt; loc} =
Format.fprintf fs "@[<2>{ txt= %a;@;loc= %a }@]" f txt pp loc

let of_lexbuf (lexbuf : Lexing.lexbuf) =
{ loc_start= lexbuf.lex_start_p
; loc_end= lexbuf.lex_curr_p
Expand Down
3 changes: 3 additions & 0 deletions lib/Migrate_ast.mli
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ module Location : sig

val is_single_line : t -> int -> bool

val pp_loc :
(Format.formatter -> 'a -> unit) -> Format.formatter -> 'a loc -> unit

val of_lexbuf : Lexing.lexbuf -> t

val print : Format.formatter -> t -> unit
Expand Down
222 changes: 222 additions & 0 deletions lib/Slicer.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
(**************************************************************************)
(* *)
(* OCamlFormat *)
(* *)
(* Copyright (c) Facebook, Inc. and its affiliates. *)
(* *)
(* This source code is licensed under the MIT license found in *)
(* the LICENSE file in the root directory of this source tree. *)
(* *)
(**************************************************************************)

let find_semisemi_start ?pos txt =
String.substr_index_all ~may_overlap:false ~pattern:";;" txt
|> fun l ->
( match pos with
| Some pos -> List.filter l ~f:(fun x -> x <= pos - 2)
| None -> l )
|> List.last

let find_semisemi_end ?pos txt =
String.substr_index_all ~may_overlap:false ~pattern:";;" txt
|> fun l ->
( match pos with
| Some pos -> List.filter l ~f:(fun x -> x > pos)
| None -> l )
|> List.hd

let find_nl_start ?pos txt =
let followed_by_space x =
match String.get txt (x + 2) with
| exception _ -> true
| ' ' -> false
| _ -> true
in
String.substr_index_all ~may_overlap:false ~pattern:"\n\n" txt
|> fun l ->
( match pos with
| Some pos ->
List.filter l ~f:(fun x -> x <= pos - 2 && followed_by_space x)
| None -> List.filter l ~f:followed_by_space )
|> List.last

let find_nl_end ?pos txt =
let followed_by_space x =
match String.get txt (x + 2) with
| exception _ -> true
| ' ' -> false
| _ -> true
in
String.substr_index_all ~may_overlap:false ~pattern:"\n\n" txt
|> fun l ->
( match pos with
| Some pos -> List.filter l ~f:(fun x -> x > pos && followed_by_space x)
| None -> List.filter l ~f:followed_by_space )
|> List.hd

let sub_start i (loc : Location.t) =
String.sub ~pos:(i + 2)
~len:(loc.loc_end.pos_cnum - loc.loc_start.pos_cnum - i - 2)

let sub_end i = String.sub ~pos:0 ~len:(i - 1)

let find_start ?pos (lexed : Cmt_lexer.token array) ~start_i
~split_on_semisemi =
let rec aux ?pos i =
if i < 0 then
match Array.unsafe_get lexed 0 with
| Cmt {txt; _} -> (0, txt, 0, 0)
| S {txt; _} -> (0, txt, 0, 0)
else
let lex = Array.unsafe_get lexed i in
match lex with
| Cmt _ -> aux (i - 1)
| S {txt; loc} -> (
let ldiff max_cnum =
let rec aux lnum i =
if i >= max_cnum then lnum
else
match String.unsafe_get txt i with
| '\n' -> aux (lnum + 1) (i + 1)
| _ -> aux lnum (i + 1)
in
aux loc.loc_start.pos_lnum 0
in
if split_on_semisemi then
match find_semisemi_start ?pos txt with
| Some x -> (
match find_nl_start ?pos txt with
| Some y ->
let x = max x y in
let cnum = x + 2 in
let ldiff = ldiff cnum in
(i, sub_start x loc txt, cnum, ldiff)
| None ->
let cnum = x + 2 in
let ldiff = ldiff cnum in
(i, sub_start x loc txt, cnum, ldiff) )
| None -> (
match find_nl_start ?pos txt with
| Some x ->
let cnum = x + 2 in
let ldiff = ldiff cnum in
(i, sub_start x loc txt, cnum, ldiff)
| None -> aux (i - 1) )
else
match find_nl_start ?pos txt with
| Some x ->
let cnum = x + 2 in
let ldiff = ldiff cnum in
(i, sub_start x loc txt, cnum, ldiff)
| None -> aux (i - 1) )
in
aux ?pos start_i

let find_end ?pos (lexed : Cmt_lexer.token array) ~end_i ~split_on_semisemi =
let max_i = Array.length lexed - 1 in
let rec aux ?pos i =
if i > max_i then
match Array.unsafe_get lexed max_i with
| Cmt {txt; _} -> (max_i, txt, String.length txt - 1)
| S {txt; _} -> (max_i, txt, String.length txt - 1)
else
let lex = Array.unsafe_get lexed i in
match lex with
| Cmt _ -> aux (i + 1)
| S {txt; loc= _} -> (
if split_on_semisemi then
match find_semisemi_end ?pos txt with
| Some x -> (
match find_nl_end ?pos txt with
| Some y ->
let x = min x y in
(i, sub_end x txt, x - 1)
| None -> (i, sub_end x txt, x - 1) )
| None -> (
match find_nl_end ?pos txt with
| Some x -> (i, sub_end x txt, x - 1)
| None -> aux (i + 1) )
else
match find_nl_end ?pos txt with
| Some x -> (i, sub_end x txt, x - 1)
| None -> aux (i + 1) )
in
aux ?pos end_i

let split ~range:((low, high) as range) ~split_on_semisemi = function
| [] -> ("", range)
| lexed ->
let lexed : Cmt_lexer.token array = Array.of_list lexed in
let high =
match Array.unsafe_get lexed (Array.length lexed - 1) with
| Cmt {loc; _} | S {loc; _} ->
if high - 1 = loc.loc_end.pos_lnum + 1 then high - 1 else high
in
let range = (low, high) in
let start_i, start_lex =
Array.findi_exn lexed ~f:(fun _ -> function
| Cmt {loc; _} | S {loc; _} ->
loc.loc_start.pos_lnum <= low - 1
&& low - 1 <= loc.loc_end.pos_lnum )
in
let end_i, end_lex =
Array.findi_exn lexed ~f:(fun _ -> function
| Cmt {loc; _} | S {loc; _} ->
loc.loc_start.pos_lnum <= high - 1
&& high - 1 <= loc.loc_end.pos_lnum )
in
let start_i, start_lex, start_pos, ldiff =
match start_lex with
| S {txt; loc} ->
let rec aux ~lnum i =
if lnum = low - 1 then i
else
match String.unsafe_get txt i with
| '\n' -> aux ~lnum:(lnum + 1) (i + 1)
| _ -> aux ~lnum (i + 1)
in
let pos = aux 0 ~lnum:loc.loc_start.pos_lnum in
find_start ~pos ~start_i ~split_on_semisemi lexed
| Cmt _ -> find_start ~start_i ~split_on_semisemi lexed
in
let end_i, end_lex, end_pos =
match end_lex with
| S {txt; loc} ->
let rec aux ~lnum i =
if lnum = high - 1 then i
else
match String.unsafe_get txt i with
| '\n' -> aux ~lnum:(lnum + 1) (i + 1)
| _ -> aux ~lnum (i + 1)
in
let pos = aux 0 ~lnum:loc.loc_start.pos_lnum in
find_end ~pos ~end_i ~split_on_semisemi lexed
| Cmt _ -> find_end ~end_i ~split_on_semisemi lexed
in
if start_i = end_i then
match Array.unsafe_get lexed start_i with
| Cmt {txt; _} -> (txt, range)
| S {txt; _} ->
( String.sub txt ~pos:start_pos ~len:(end_pos - start_pos + 1)
, (low - ldiff, high - ldiff) )
else
let rec aux acc i =
if i = end_i then acc ^ end_lex
else
match Array.unsafe_get lexed i with
| Cmt {txt; _} -> aux (acc ^ txt) (i + 1)
| S {txt; _} -> aux (acc ^ txt) (i + 1)
in
(aux start_lex (start_i + 1), (low - ldiff, high - ldiff))

let fragment (type a) (fg : a Extended_ast.t) ~range input =
let split_on_semisemi =
match fg with
| Structure | Use_file -> true
| Signature -> false
| Core_type -> failwith "Slicer.fragment not implemented for Core_type"
| Module_type ->
failwith "Slicer.fragment not implemented for Module_type"
| Expression -> failwith "Slicer.fragment not implemented for Expression"
in
Cmt_lexer.lex_comments input |> split ~range ~split_on_semisemi
23 changes: 23 additions & 0 deletions lib/Slicer.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
(**************************************************************************)
(* *)
(* OCamlFormat *)
(* *)
(* Copyright (c) Facebook, Inc. and its affiliates. *)
(* *)
(* This source code is licensed under the MIT license found in *)
(* the LICENSE file in the root directory of this source tree. *)
(* *)
(**************************************************************************)

val split :
range:int * int
-> split_on_semisemi:bool
-> Cmt_lexer.token list
-> string * (int * int)
(** Exposed for tests. *)

val fragment :
'a Extended_ast.t -> range:int * int -> string -> string * (int * int)
(** [fragment fg ~range x] slices [x] into a smaller string, filtered
according to the range of lines [range], and containing the relevant
context so we can infer the indentation to apply to this range of lines. *)
Loading

0 comments on commit 3449dba

Please sign in to comment.