From 17fd534a4f3759bbb37dceebf193a8f218ca0e03 Mon Sep 17 00:00:00 2001 From: Jelle van der Waa Date: Mon, 19 Oct 2020 22:43:23 +0200 Subject: Add core.db.tar.zst support for reporead As Python does not support zstd compression yet, xtarfile a wrapper around tarfile with zstd support is required. --- devel/management/commands/reporead.py | 53 +++++++++++++++++------------------ requirements.txt | 2 ++ 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 4de002b3..ba80ebe5 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -20,7 +20,7 @@ import io import os import re import sys -import tarfile +import xtarfile as tarfile import logging from datetime import datetime from pytz import utc @@ -550,33 +550,32 @@ def parse_repo(repopath): logger.error("File does not have the proper extension") raise Exception("File does not have the proper extension") - repodb = tarfile.open(repopath, "r") - logger.debug("Starting package parsing") - newpkg = lambda: RepoPackage(reponame) - pkgs = defaultdict(newpkg) - for tarinfo in repodb.getmembers(): - if tarinfo.isreg(): - pkgid, fname = os.path.split(tarinfo.name) - if fname == 'files': - # don't parse yet for speed and memory consumption reasons - files_data = repodb.extractfile(tarinfo) - pkgs[pkgid].files = files_data.read() - del files_data - elif fname in ('desc', 'depends'): - data_file = repodb.extractfile(tarinfo) - data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), - encoding='UTF-8') - try: - pkgs[pkgid].populate(parse_info(data_file)) - except UnicodeDecodeError: - logger.warning("Could not correctly decode %s, skipping file", - tarinfo.name) - data_file.close() - del data_file - - logger.debug("Done parsing file %s/%s", pkgid, fname) + with tarfile.open(repopath, 'r') as repodb: + logger.debug("Starting package parsing") + newpkg = lambda: RepoPackage(reponame) + pkgs = defaultdict(newpkg) + for tarinfo in repodb.getmembers(): + if tarinfo.isreg(): + pkgid, fname = os.path.split(tarinfo.name) + if fname == 'files': + # don't parse yet for speed and memory consumption reasons + files_data = repodb.extractfile(tarinfo) + pkgs[pkgid].files = files_data.read() + del files_data + elif fname in ('desc', 'depends'): + data_file = repodb.extractfile(tarinfo) + data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), + encoding='UTF-8') + try: + pkgs[pkgid].populate(parse_info(data_file)) + except UnicodeDecodeError: + logger.warning("Could not correctly decode %s, skipping file", + tarinfo.name) + data_file.close() + del data_file + + logger.debug("Done parsing file %s/%s", pkgid, fname) - repodb.close() logger.info("Finished repo parsing, %d total packages", len(pkgs)) return (reponame, pkgs.values()) diff --git a/requirements.txt b/requirements.txt index e8488f50..ffff37d7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,3 +15,5 @@ ptpython==2.0.4 feedparser==6.0.1 bleach==3.2.0 requests==2.24.0 +xtarfile==0.0.4 +zstandard==0.14.0 -- cgit v1.2.3-55-g3dc8