fix: use better python implementation for import from text file.

This commit is contained in:
redxef 2025-03-11 18:40:01 +01:00
parent e18fda87ca
commit add536d1d0
Signed by: redxef
GPG key ID: 7DAC3AA211CBD921
2 changed files with 50 additions and 22 deletions

50
fromtext.py Executable file
View file

@ -0,0 +1,50 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import pathlib
def _sanitize_line(l: str) -> str:
if '\n' in l and l.startswith('"') and l.endswith('"'):
return l[1:-1].replace('""', '"')
return l
def process_line(
*,
columns: list[str],
columns_indices: list[int],
target_dir: pathlib.Path,
) -> None:
columns = [_sanitize_line(l) for l in columns]
columns = [columns[i] for i in columns_indices]
slug = ''.join(c for c in columns[0].lower() if c.isalnum() or c == ' ')
slug = slug.replace(' ', '-')
base_filename = target_dir / slug
for i, c in enumerate(columns):
with (base_filename.parent / (base_filename.name + f'.{i}.md')).open('w') as fp:
fp.write(c)
def main(
*,
target_column_count: int = 6,
columns_indices: list[int] = [3, 4],
sep: str = '\t',
):
target_dir = pathlib.Path(next(iter(sys.argv[1:] or ['.'])))
buff = ''
process_header = True
for l in sys.stdin.readlines():
if process_header and l.startswith('#'):
continue
else:
process_header = False
buff += l
split_ = buff.split(sep)
if len(split_) == target_column_count:
process_line(columns=split_, columns_indices=columns_indices, target_dir=target_dir)
buff = ''
if __name__ == '__main__':
main()

View file

@ -1,22 +0,0 @@
#!/bin/sh
SEP='\t'
COLUMNS='4 5'
main() {
while read -r line; do
filename=
i=0
for c in $COLUMNS; do
f="$(echo "$line" | awk -F$SEP "{print \$$c}")"
[ -z "$f" ] && continue
if [ "$filename" = '' ]; then
filename="$(echo "$f" | tr '[:upper:]' '[:lower:]' | sed -E -e 's/ /-/g' -e 's/[^a-z0-9-]//g' -n -e 's/^(.{0,128}).*$/\1/p')"
fi
echo "$f" > "$filename.$i.md"
i=$((i+1))
done
done
}
main "$@"