[Keymap] Unicode and Pointing Device and Autocorect for drashna keymaps (#15415)

This commit is contained in:
Drashna Jaelre 2021-12-14 20:53:36 -08:00 committed by GitHub
parent c10bc9f91e
commit 3fa592a402
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
45 changed files with 815 additions and 187 deletions

View file

@ -0,0 +1,143 @@
// Copyright 2021 Google LLC
// Copyright 2022 @filterpaper
// SPDX-License-Identifier: Apache-2.0
// Original source: https://getreuer.info/posts/keyboards/autocorrection
#include "autocorrection.h"
#include <string.h>
#if __has_include("autocorrection_data.h")
# include "autocorrection_data.h"
# if AUTOCORRECTION_MIN_LENGTH < 4
# error Minimum Length is too short and may cause overflows
# endif
bool process_autocorrection(uint16_t keycode, keyrecord_t* record) {
static uint8_t typo_buffer[AUTOCORRECTION_MAX_LENGTH] = {KC_SPC};
static uint8_t typo_buffer_size = 1;
if (keycode == AUTO_CTN) {
if (record->event.pressed) {
typo_buffer_size = 0;
userspace_config.autocorrection ^= 1;
eeconfig_update_user(userspace_config.raw);
}
return false;
}
if (!userspace_config.autocorrection) {
typo_buffer_size = 0;
return true;
}
switch (keycode) {
case KC_LSFT:
case KC_RSFT:
return true;
# ifndef NO_ACTION_TAPPING
case QK_MOD_TAP ... QK_MOD_TAP_MAX:
if (((keycode >> 8) & 0xF) == MOD_LSFT) {
return true;
}
# ifndef NO_ACTION_LAYER
case QK_LAYER_TAP ... QK_LAYER_TAP_MAX:
# endif
if (record->event.pressed || !record->tap.count) {
return true;
}
keycode &= 0xFF;
break;
# endif
# ifndef NO_ACTION_ONESHOT
case QK_ONE_SHOT_MOD ... QK_ONE_SHOT_MOD_MAX:
if ((keycode & 0xF) == MOD_LSFT) {
return true;
}
# endif
default:
if (!record->event.pressed) {
return true;
}
}
// Subtract buffer for Backspace key, reset for other non-alpha.
if (!(KC_A <= keycode && keycode <= KC_Z)) {
if (keycode == KC_BSPC) {
// Remove last character from the buffer.
if (typo_buffer_size > 0) {
--typo_buffer_size;
}
return true;
} else if (KC_1 <= keycode && keycode <= KC_SLSH && keycode != KC_ESC) {
// Set a word boundary if space, period, digit, etc. is pressed.
// Behave more conservatively for the enter key. Reset, so that enter
// can't be used on a word ending.
if (keycode == KC_ENT) {
typo_buffer_size = 0;
}
keycode = KC_SPC;
} else {
// Clear state if some other non-alpha key is pressed.
typo_buffer_size = 0;
return true;
}
}
// Rotate oldest character if buffer is full.
if (typo_buffer_size >= AUTOCORRECTION_MAX_LENGTH) {
memmove(typo_buffer, typo_buffer + 1, AUTOCORRECTION_MAX_LENGTH - 1);
typo_buffer_size = AUTOCORRECTION_MAX_LENGTH - 1;
}
// Append `keycode` to buffer.
typo_buffer[typo_buffer_size++] = keycode;
// Return if buffer is smaller than the shortest word.
if (typo_buffer_size < AUTOCORRECTION_MIN_LENGTH) {
return true;
}
// Check for typo in buffer using a trie stored in `autocorrection_data`.
uint16_t state = 0;
uint8_t code = pgm_read_byte(autocorrection_data + state);
for (uint8_t i = typo_buffer_size - 1; i >= 0; --i) {
uint8_t const key_i = typo_buffer[i];
if (code & 64) { // Check for match in node with multiple children.
code &= 63;
for (; code != key_i; code = pgm_read_byte(autocorrection_data + (state += 3))) {
if (!code) return true;
}
// Follow link to child node.
state = (pgm_read_byte(autocorrection_data + state + 1) | pgm_read_byte(autocorrection_data + state + 2) << 8);
// Check for match in node with single child.
} else if (code != key_i) {
return true;
} else if (!(code = pgm_read_byte(autocorrection_data + (++state)))) {
++state;
}
code = pgm_read_byte(autocorrection_data + state);
if (code & 128) { // A typo was found! Apply autocorrection.
const uint8_t backspaces = code & 63;
for (uint8_t i = 0; i < backspaces; ++i) {
tap_code(KC_BSPC);
}
send_string_P((char const*)(autocorrection_data + state + 1));
if (keycode == KC_SPC) {
typo_buffer[0] = KC_SPC;
typo_buffer_size = 1;
return true;
} else {
typo_buffer_size = 0;
return false;
}
}
}
return true;
}
#else
# pragma message "Warning!!! Autocorrect is not corretly setup!"
bool process_autocorrection(uint16_t keycode, keyrecord_t* record) { return true; }
#endif

View file

@ -0,0 +1,10 @@
// Copyright 2021 Google LLC
// Copyright 2022 @filterpaper
// SPDX-License-Identifier: Apache-2.0
// Original source: https://getreuer.info/posts/keyboards/autocorrection
#pragma once
#include "drashna.h"
bool process_autocorrection(uint16_t keycode, keyrecord_t* record);

View file

@ -0,0 +1,273 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Python program to make autocorrection_data.h.
This program reads "autocorrection_dict.txt" and generates a C source file
"autocorrection_data.h" with a serialized trie embedded as an array. Run this
program without arguments like
$ python3 make_autocorrection_data.py
Or to read from a different typo dict file, pass it as the first argument like
$ python3 make_autocorrection_data.py dict.txt
Each line of the dict file defines one typo and its correction with the syntax
"typo -> correction". Blank lines or lines starting with '#' are ignored.
Example:
:thier -> their
fitler -> filter
lenght -> length
ouput -> output
widht -> width
See autocorrection_dict_extra.txt for a larger example.
For full documentation, see
https://getreuer.info/posts/keyboards/autocorrection
"""
import sys
import textwrap
from typing import Any, Dict, List, Tuple
try:
from english_words import english_words_lower_alpha_set as CORRECT_WORDS
except ImportError:
print('Autocorrection will falsely trigger when a typo is a substring of a '
'correctly spelled word. To check for this, install the english_words '
'package and rerun this script:\n\n pip install english_words\n')
# Use a minimal word list as a fallback.
CORRECT_WORDS = ('information', 'available', 'international', 'language',
'loosest', 'reference', 'wealthier', 'entertainment',
'association', 'provides', 'technology', 'statehood')
KC_A = 4
KC_SPC = 0x2c
def parse_file(file_name: str) -> List[Tuple[str, str]]:
"""Parses autocorrections dictionary file.
Each line of the file defines one typo and its correction with the syntax
"typo -> correction". Blank lines or lines starting with '#' are ignored. The
function validates that typos only have characters a-z and that typos are not
substrings of other typos, otherwise the longer typo would never trigger.
Args:
file_name: String, path of the autocorrections dictionary.
Returns:
List of (typo, correction) tuples.
"""
autocorrections = []
typos = set()
line_number = 0
for line in open(file_name, 'rt'):
line_number += 1
line = line.strip()
if line and line[0] != '#':
# Parse syntax "typo -> correction", using strip to ignore indenting.
tokens = [token.strip() for token in line.split('->', 1)]
if len(tokens) != 2 or not tokens[0]:
print(f'Error:{line_number}: Invalid syntax: "{line}"')
sys.exit(1)
typo, correction = tokens
typo = typo.lower() # Force typos to lowercase.
typo = typo.replace(' ', ':')
if typo in typos:
print(f'Warning:{line_number}: Ignoring duplicate typo: "{typo}"')
continue
# Check that `typo` is valid.
if not(all([ord('a') <= ord(c) <= ord('z') or c == ':' for c in typo])):
print(f'Error:{line_number}: Typo "{typo}" has '
'characters other than a-z and :.')
sys.exit(1)
for other_typo in typos:
if typo in other_typo or other_typo in typo:
print(f'Error:{line_number}: Typos may not be substrings of one '
f'another, otherwise the longer typo would never trigger: '
f'"{typo}" vs. "{other_typo}".')
sys.exit(1)
if len(typo) < 5:
print(f'Warning:{line_number}: It is suggested that typos are at '
f'least 5 characters long to avoid false triggers: "{typo}"')
if typo.startswith(':') and typo.endswith(':'):
if typo[1:-1] in CORRECT_WORDS:
print(f'Warning:{line_number}: Typo "{typo}" is a correctly spelled '
'dictionary word.')
elif typo.startswith(':') and not typo.endswith(':'):
for word in CORRECT_WORDS:
if word.startswith(typo[1:]):
print(f'Warning:{line_number}: Typo "{typo}" would falsely trigger '
f'on correctly spelled word "{word}".')
elif not typo.startswith(':') and typo.endswith(':'):
for word in CORRECT_WORDS:
if word.endswith(typo[:-1]):
print(f'Warning:{line_number}: Typo "{typo}" would falsely trigger '
f'on correctly spelled word "{word}".')
elif not typo.startswith(':') and not typo.endswith(':'):
for word in CORRECT_WORDS:
if typo in word:
print(f'Warning:{line_number}: Typo "{typo}" would falsely trigger '
f'on correctly spelled word "{word}".')
autocorrections.append((typo, correction))
typos.add(typo)
return autocorrections
def make_trie(autocorrections: List[Tuple[str, str]]) -> Dict[str, Any]:
"""Makes a trie from the the typos, writing in reverse.
Args:
autocorrections: List of (typo, correction) tuples.
Returns:
Dict of dict, representing the trie.
"""
trie = {}
for typo, correction in autocorrections:
node = trie
for letter in typo[::-1]:
node = node.setdefault(letter, {})
node['LEAF'] = (typo, correction)
return trie
def serialize_trie(autocorrections: List[Tuple[str, str]],
trie: Dict[str, Any]) -> List[int]:
"""Serializes trie and correction data in a form readable by the C code.
Args:
autocorrections: List of (typo, correction) tuples.
trie: Dict of dicts.
Returns:
List of ints in the range 0-255.
"""
table = []
# Traverse trie in depth first order.
def traverse(trie_node):
if 'LEAF' in trie_node: # Handle a leaf trie node.
typo, correction = trie_node['LEAF']
word_boundary_ending = typo[-1] == ':'
typo = typo.strip(':')
i = 0 # Make the autocorrection data for this entry and serialize it.
while i < min(len(typo), len(correction)) and typo[i] == correction[i]:
i += 1
backspaces = len(typo) - i - 1 + word_boundary_ending
assert 0 <= backspaces <= 63
correction = correction[i:]
data = [backspaces + 128] + list(bytes(correction, 'ascii')) + [0]
entry = {'data': data, 'links': [], 'byte_offset': 0}
table.append(entry)
elif len(trie_node) == 1: # Handle trie node with a single child.
c, trie_node = next(iter(trie_node.items()))
entry = {'chars': c, 'byte_offset': 0}
# It's common for a trie to have long chains of single-child nodes. We
# find the whole chain so that we can serialize it more efficiently.
while len(trie_node) == 1 and 'LEAF' not in trie_node:
c, trie_node = next(iter(trie_node.items()))
entry['chars'] += c
table.append(entry)
entry['links'] = [traverse(trie_node)]
else: # Handle trie node with multiple children.
entry = {'chars': ''.join(sorted(trie_node.keys())), 'byte_offset': 0}
table.append(entry)
entry['links'] = [traverse(trie_node[c]) for c in entry['chars']]
return entry
traverse(trie)
def serialize(e):
def kc_code(c):
if ord('a') <= ord(c) <= ord('z'):
return ord(c) - ord('a') + KC_A
elif c == ':':
return KC_SPC
else:
raise ValueError(f'Invalid character: {c}')
encode_link = lambda link: [link['byte_offset'] & 255,
link['byte_offset'] >> 8]
if not e['links']: # Handle a leaf table entry.
return e['data']
elif len(e['links']) == 1: # Handle a chain table entry.
return list(map(kc_code, e['chars'])) + [0] #+ encode_link(e['links'][0]))
else: # Handle a branch table entry.
data = []
for c, link in zip(e['chars'], e['links']):
data += [kc_code(c) | (0 if data else 64)] + encode_link(link)
return data + [0]
byte_offset = 0
for e in table: # To encode links, first compute byte offset of each entry.
e['byte_offset'] = byte_offset
byte_offset += len(serialize(e))
assert 0 <= byte_offset <= 0xffff
return [b for e in table for b in serialize(e)] # Serialize final table.
def write_generated_code(autocorrections: List[Tuple[str, str]],
data: List[int],
file_name: str) -> None:
"""Writes autocorrection data as generated C code to `file_name`.
Args:
autocorrections: List of (typo, correction) tuples.
data: List of ints in 0-255, the serialized trie.
file_name: String, path of the output C file.
"""
assert all(0 <= b <= 255 for b in data)
typo_len = lambda e: len(e[0])
min_typo = min(autocorrections, key=typo_len)[0]
max_typo = max(autocorrections, key=typo_len)[0]
generated_code = ''.join([
'// Generated code.\n\n',
f'// Autocorrection dictionary ({len(autocorrections)} entries):\n',
''.join(sorted(f'// {typo:<{len(max_typo)}} -> {correction}\n'
for typo, correction in autocorrections)),
f'\n#define AUTOCORRECTION_MIN_LENGTH {len(min_typo)} // "{min_typo}"\n',
f'#define AUTOCORRECTION_MAX_LENGTH {len(max_typo)} // "{max_typo}"\n\n',
textwrap.fill('static const uint8_t autocorrection_data[%d] PROGMEM = {%s};' % (
len(data), ', '.join(map(str, data))), width=80, subsequent_indent=' '),
'\n\n'])
with open(file_name, 'wt') as f:
f.write(generated_code)
def main(argv):
dict_file = argv[1] if len(argv) > 1 else 'autocorrection_dict.txt'
autocorrections = parse_file(dict_file)
trie = make_trie(autocorrections)
data = serialize_trie(autocorrections, trie)
print(f'Processed %d autocorrection entries to table with %d bytes.'
% (len(autocorrections), len(data)))
write_generated_code(autocorrections, data, 'autocorrection_data.h')
if __name__ == '__main__':
main(sys.argv)