Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor: Move some code to new files for reuse #3434

Merged
merged 2 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 3 additions & 62 deletions codespell_lib/_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
Tuple,
)

from ._spellchecker import Misspelling, build_dict
from ._text_util import fix_case

# autogenerated by setuptools_scm
from ._version import ( # type: ignore[import-not-found]
__version__ as VERSION, # noqa: N812
Expand All @@ -52,9 +55,6 @@
"(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|"
"\\b[\\w.%+-]+@[\\w.-]+\\b)"
)
# Pass all misspellings through this translation table to generate
# alternative misspellings and fixes.
alt_chars = (("'", "’"),) # noqa: RUF001
inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P<words>[\w,]*))?")
USAGE = """
\t%prog [OPTIONS] [file1 file2 ... fileN]
Expand Down Expand Up @@ -167,13 +167,6 @@ def match(self, filename: str) -> bool:
return any(fnmatch.fnmatch(filename, p) for p in self.pattern_list)


class Misspelling:
def __init__(self, data: str, fix: bool, reason: str) -> None:
self.data = data
self.fix = fix
self.reason = reason


class TermColors:
def __init__(self) -> None:
self.FILE = "\033[33m"
Expand Down Expand Up @@ -703,48 +696,6 @@ def build_ignore_words(
)


def add_misspelling(
key: str,
data: str,
misspellings: Dict[str, Misspelling],
) -> None:
data = data.strip()

if "," in data:
fix = False
data, reason = data.rsplit(",", 1)
reason = reason.lstrip()
else:
fix = True
reason = ""

misspellings[key] = Misspelling(data, fix, reason)


def build_dict(
filename: str,
misspellings: Dict[str, Misspelling],
ignore_words: Set[str],
) -> None:
with open(filename, encoding="utf-8") as f:
translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars]
for line in f:
[key, data] = line.split("->")
# TODO: For now, convert both to lower.
# Someday we can maybe add support for fixing caps.
key = key.lower()
data = data.lower()
if key not in ignore_words:
add_misspelling(key, data, misspellings)
# generate alternative misspellings/fixes
for x, table in translate_tables:
if x in key:
alt_key = key.translate(table)
alt_data = data.translate(table)
if alt_key not in ignore_words:
add_misspelling(alt_key, alt_data, misspellings)


def is_hidden(filename: str, check_hidden: bool) -> bool:
bfilename = os.path.basename(filename)

Expand All @@ -759,16 +710,6 @@ def is_text_file(filename: str) -> bool:
return b"\x00" not in s


def fix_case(word: str, fixword: str) -> str:
if word == word.capitalize():
return ", ".join(w.strip().capitalize() for w in fixword.split(","))
if word == word.upper():
return fixword.upper()
# they are both lower case
# or we don't have any idea
return fixword


def ask_for_word_fix(
line: str,
match: Match[str],
Expand Down
75 changes: 75 additions & 0 deletions codespell_lib/_spellchecker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see
# https://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
"""
Copyright (C) 2010-2011 Lucas De Marchi <[email protected]>
Copyright (C) 2011 ProFUSION embedded systems
"""

from typing import (
Dict,
Set,
)

# Pass all misspellings through this translation table to generate
# alternative misspellings and fixes.
alt_chars = (("'", "’"),) # noqa: RUF001


class Misspelling:
def __init__(self, data: str, fix: bool, reason: str) -> None:
self.data = data
self.fix = fix
self.reason = reason


def add_misspelling(
key: str,
data: str,
misspellings: Dict[str, Misspelling],
) -> None:
data = data.strip()

if "," in data:
fix = False
data, reason = data.rsplit(",", 1)
reason = reason.lstrip()
else:
fix = True
reason = ""

misspellings[key] = Misspelling(data, fix, reason)


def build_dict(
filename: str,
misspellings: Dict[str, Misspelling],
ignore_words: Set[str],
) -> None:
with open(filename, encoding="utf-8") as f:
translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars]
for line in f:
[key, data] = line.split("->")
# TODO: For now, convert both to lower.
# Someday we can maybe add support for fixing caps.
key = key.lower()
data = data.lower()
if key not in ignore_words:
add_misspelling(key, data, misspellings)
# generate alternative misspellings/fixes
for x, table in translate_tables:
if x in key:
alt_key = key.translate(table)
alt_data = data.translate(table)
if alt_key not in ignore_words:
add_misspelling(alt_key, alt_data, misspellings)
27 changes: 27 additions & 0 deletions codespell_lib/_text_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see
# https://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
"""
Copyright (C) 2010-2011 Lucas De Marchi <[email protected]>
Copyright (C) 2011 ProFUSION embedded systems
"""


def fix_case(word: str, fixword: str) -> str:
if word == word.capitalize():
return ", ".join(w.strip().capitalize() for w in fixword.split(","))
if word == word.upper():
return fixword.upper()
# they are both lower case
# or we don't have any idea
return fixword
Loading