diff options
author | Jelle van der Waa <jelle@archlinux.org> | 2020-10-19 22:43:23 +0200 |
---|---|---|
committer | jelle van der Waa <jelle@vdwaa.nl> | 2020-10-26 21:28:21 +0100 |
commit | 17fd534a4f3759bbb37dceebf193a8f218ca0e03 (patch) | |
tree | b29c0b87aa68471ff74cec840661feb475612bdd | |
parent | 1b78d0ad54a41400925e04689fe6bb066eba3eff (diff) | |
download | archweb-17fd534a4f3759bbb37dceebf193a8f218ca0e03.tar.gz archweb-17fd534a4f3759bbb37dceebf193a8f218ca0e03.zip |
Add core.db.tar.zst support for reporead
As Python does not support zstd compression yet, xtarfile a wrapper
around tarfile with zstd support is required.
-rw-r--r-- | devel/management/commands/reporead.py | 53 | ||||
-rw-r--r-- | requirements.txt | 2 |
2 files changed, 28 insertions, 27 deletions
diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 4de002b3..ba80ebe5 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -20,7 +20,7 @@ import io import os import re import sys -import tarfile +import xtarfile as tarfile import logging from datetime import datetime from pytz import utc @@ -550,33 +550,32 @@ def parse_repo(repopath): logger.error("File does not have the proper extension") raise Exception("File does not have the proper extension") - repodb = tarfile.open(repopath, "r") - logger.debug("Starting package parsing") - newpkg = lambda: RepoPackage(reponame) - pkgs = defaultdict(newpkg) - for tarinfo in repodb.getmembers(): - if tarinfo.isreg(): - pkgid, fname = os.path.split(tarinfo.name) - if fname == 'files': - # don't parse yet for speed and memory consumption reasons - files_data = repodb.extractfile(tarinfo) - pkgs[pkgid].files = files_data.read() - del files_data - elif fname in ('desc', 'depends'): - data_file = repodb.extractfile(tarinfo) - data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), - encoding='UTF-8') - try: - pkgs[pkgid].populate(parse_info(data_file)) - except UnicodeDecodeError: - logger.warning("Could not correctly decode %s, skipping file", - tarinfo.name) - data_file.close() - del data_file - - logger.debug("Done parsing file %s/%s", pkgid, fname) + with tarfile.open(repopath, 'r') as repodb: + logger.debug("Starting package parsing") + newpkg = lambda: RepoPackage(reponame) + pkgs = defaultdict(newpkg) + for tarinfo in repodb.getmembers(): + if tarinfo.isreg(): + pkgid, fname = os.path.split(tarinfo.name) + if fname == 'files': + # don't parse yet for speed and memory consumption reasons + files_data = repodb.extractfile(tarinfo) + pkgs[pkgid].files = files_data.read() + del files_data + elif fname in ('desc', 'depends'): + data_file = repodb.extractfile(tarinfo) + data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), + encoding='UTF-8') + try: + pkgs[pkgid].populate(parse_info(data_file)) + except UnicodeDecodeError: + logger.warning("Could not correctly decode %s, skipping file", + tarinfo.name) + data_file.close() + del data_file + + logger.debug("Done parsing file %s/%s", pkgid, fname) - repodb.close() logger.info("Finished repo parsing, %d total packages", len(pkgs)) return (reponame, pkgs.values()) diff --git a/requirements.txt b/requirements.txt index e8488f50..ffff37d7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,3 +15,5 @@ ptpython==2.0.4 feedparser==6.0.1 bleach==3.2.0 requests==2.24.0 +xtarfile==0.0.4 +zstandard==0.14.0 |