diff options
author | Alban Gruin | 2020-09-19 19:14:31 +0200 |
---|---|---|
committer | Alban Gruin | 2020-09-19 19:14:31 +0200 |
commit | 36c9cd7d082bb2077a910269702a13784ef10fbb (patch) | |
tree | 9373be551d52a9c3b71c4dcdd30d054f01fedc0d | |
parent | cd3d6caea294eb041e8635c5f62ec90027fee281 (diff) |
course: memoized version of `get_unicode'
To avoid creating buffers to decode an HTML entity, this create a
memoized version of `get_unicode' in hope to operate a bit faster and
reduce memory allocations.
Signed-off-by: Alban Gruin <alban at pa1ch dot fr>
-rw-r--r-- | src/course.ml | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/src/course.ml b/src/course.ml index 4d02df3..461c99c 100644 --- a/src/course.ml +++ b/src/course.ml @@ -19,18 +19,30 @@ open CalendarLib module J = Json_encoding +let memoize f = + let hashtbl = Hashtbl.create ~random:true 100 in + fun v -> + match Hashtbl.find_opt hashtbl v with + | Some r -> r + | None -> + let r = f v in + Hashtbl.add hashtbl v r; + r + let get_unicode v = let b = Buffer.create 1 in Buffer.add_utf_8_uchar b (Uchar.of_int v); Buffer.contents b +let memoized_get_unicode = memoize get_unicode + let html_entities_regex = Re.Perl.compile_pat "&#(\\d+);" let replace_entities str = Re.Pcre.full_split ~rex:html_entities_regex str |> List.filter_map (function | Re.Pcre.Group (_, v) -> - Some ("&#" ^ v ^ ";", get_unicode @@ int_of_string v) + Some ("&#" ^ v ^ ";", memoized_get_unicode @@ int_of_string v) | _ -> None) |> Stringext.replace_all_assoc str |