From add536d1d08f2743393864d456fa9ce567a84fed Mon Sep 17 00:00:00 2001 From: redxef Date: Tue, 11 Mar 2025 18:40:01 +0100 Subject: [PATCH] fix: use better python implementation for import from text file. --- fromtext.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ fromtext.sh | 22 ---------------------- 2 files changed, 50 insertions(+), 22 deletions(-) create mode 100755 fromtext.py delete mode 100755 fromtext.sh diff --git a/fromtext.py b/fromtext.py new file mode 100755 index 0000000..b229358 --- /dev/null +++ b/fromtext.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import sys +import pathlib + + +def _sanitize_line(l: str) -> str: + if '\n' in l and l.startswith('"') and l.endswith('"'): + return l[1:-1].replace('""', '"') + return l + +def process_line( + *, + columns: list[str], + columns_indices: list[int], + target_dir: pathlib.Path, +) -> None: + columns = [_sanitize_line(l) for l in columns] + columns = [columns[i] for i in columns_indices] + slug = ''.join(c for c in columns[0].lower() if c.isalnum() or c == ' ') + slug = slug.replace(' ', '-') + base_filename = target_dir / slug + for i, c in enumerate(columns): + with (base_filename.parent / (base_filename.name + f'.{i}.md')).open('w') as fp: + fp.write(c) + +def main( + *, + target_column_count: int = 6, + columns_indices: list[int] = [3, 4], + sep: str = '\t', +): + target_dir = pathlib.Path(next(iter(sys.argv[1:] or ['.']))) + buff = '' + process_header = True + for l in sys.stdin.readlines(): + if process_header and l.startswith('#'): + continue + else: + process_header = False + buff += l + split_ = buff.split(sep) + if len(split_) == target_column_count: + process_line(columns=split_, columns_indices=columns_indices, target_dir=target_dir) + buff = '' + +if __name__ == '__main__': + main() diff --git a/fromtext.sh b/fromtext.sh deleted file mode 100755 index b4cb953..0000000 --- a/fromtext.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/sh - -SEP='\t' -COLUMNS='4 5' - -main() { - while read -r line; do - filename= - i=0 - for c in $COLUMNS; do - f="$(echo "$line" | awk -F$SEP "{print \$$c}")" - [ -z "$f" ] && continue - if [ "$filename" = '' ]; then - filename="$(echo "$f" | tr '[:upper:]' '[:lower:]' | sed -E -e 's/ /-/g' -e 's/[^a-z0-9-]//g' -n -e 's/^(.{0,128}).*$/\1/p')" - fi - echo "$f" > "$filename.$i.md" - i=$((i+1)) - done - done -} - -main "$@"