aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlban Gruin2020-09-19 19:14:31 +0200
committerAlban Gruin2020-09-19 19:14:31 +0200
commit36c9cd7d082bb2077a910269702a13784ef10fbb (patch)
tree9373be551d52a9c3b71c4dcdd30d054f01fedc0d
parentcd3d6caea294eb041e8635c5f62ec90027fee281 (diff)
course: memoized version of `get_unicode'
To avoid creating buffers to decode an HTML entity, this create a memoized version of `get_unicode' in hope to operate a bit faster and reduce memory allocations. Signed-off-by: Alban Gruin <alban at pa1ch dot fr>
-rw-r--r--src/course.ml14
1 files changed, 13 insertions, 1 deletions
diff --git a/src/course.ml b/src/course.ml
index 4d02df3..461c99c 100644
--- a/src/course.ml
+++ b/src/course.ml
@@ -19,18 +19,30 @@ open CalendarLib
module J = Json_encoding
+let memoize f =
+ let hashtbl = Hashtbl.create ~random:true 100 in
+ fun v ->
+ match Hashtbl.find_opt hashtbl v with
+ | Some r -> r
+ | None ->
+ let r = f v in
+ Hashtbl.add hashtbl v r;
+ r
+
let get_unicode v =
let b = Buffer.create 1 in
Buffer.add_utf_8_uchar b (Uchar.of_int v);
Buffer.contents b
+let memoized_get_unicode = memoize get_unicode
+
let html_entities_regex = Re.Perl.compile_pat "&#(\\d+);"
let replace_entities str =
Re.Pcre.full_split ~rex:html_entities_regex str
|> List.filter_map (function
| Re.Pcre.Group (_, v) ->
- Some ("&#" ^ v ^ ";", get_unicode @@ int_of_string v)
+ Some ("&#" ^ v ^ ";", memoized_get_unicode @@ int_of_string v)
| _ -> None)
|> Stringext.replace_all_assoc str