summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJelle van der Waa <jelle@archlinux.org>2020-10-19 22:43:23 +0200
committerjelle van der Waa <jelle@vdwaa.nl>2020-10-26 21:28:21 +0100
commit17fd534a4f3759bbb37dceebf193a8f218ca0e03 (patch)
treeb29c0b87aa68471ff74cec840661feb475612bdd
parent1b78d0ad54a41400925e04689fe6bb066eba3eff (diff)
downloadarchweb-17fd534a4f3759bbb37dceebf193a8f218ca0e03.tar.gz
archweb-17fd534a4f3759bbb37dceebf193a8f218ca0e03.zip
Add core.db.tar.zst support for reporead
As Python does not support zstd compression yet, xtarfile a wrapper around tarfile with zstd support is required.
-rw-r--r--devel/management/commands/reporead.py53
-rw-r--r--requirements.txt2
2 files changed, 28 insertions, 27 deletions
diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py
index 4de002b3..ba80ebe5 100644
--- a/devel/management/commands/reporead.py
+++ b/devel/management/commands/reporead.py
@@ -20,7 +20,7 @@ import io
import os
import re
import sys
-import tarfile
+import xtarfile as tarfile
import logging
from datetime import datetime
from pytz import utc
@@ -550,33 +550,32 @@ def parse_repo(repopath):
logger.error("File does not have the proper extension")
raise Exception("File does not have the proper extension")
- repodb = tarfile.open(repopath, "r")
- logger.debug("Starting package parsing")
- newpkg = lambda: RepoPackage(reponame)
- pkgs = defaultdict(newpkg)
- for tarinfo in repodb.getmembers():
- if tarinfo.isreg():
- pkgid, fname = os.path.split(tarinfo.name)
- if fname == 'files':
- # don't parse yet for speed and memory consumption reasons
- files_data = repodb.extractfile(tarinfo)
- pkgs[pkgid].files = files_data.read()
- del files_data
- elif fname in ('desc', 'depends'):
- data_file = repodb.extractfile(tarinfo)
- data_file = io.TextIOWrapper(io.BytesIO(data_file.read()),
- encoding='UTF-8')
- try:
- pkgs[pkgid].populate(parse_info(data_file))
- except UnicodeDecodeError:
- logger.warning("Could not correctly decode %s, skipping file",
- tarinfo.name)
- data_file.close()
- del data_file
-
- logger.debug("Done parsing file %s/%s", pkgid, fname)
+ with tarfile.open(repopath, 'r') as repodb:
+ logger.debug("Starting package parsing")
+ newpkg = lambda: RepoPackage(reponame)
+ pkgs = defaultdict(newpkg)
+ for tarinfo in repodb.getmembers():
+ if tarinfo.isreg():
+ pkgid, fname = os.path.split(tarinfo.name)
+ if fname == 'files':
+ # don't parse yet for speed and memory consumption reasons
+ files_data = repodb.extractfile(tarinfo)
+ pkgs[pkgid].files = files_data.read()
+ del files_data
+ elif fname in ('desc', 'depends'):
+ data_file = repodb.extractfile(tarinfo)
+ data_file = io.TextIOWrapper(io.BytesIO(data_file.read()),
+ encoding='UTF-8')
+ try:
+ pkgs[pkgid].populate(parse_info(data_file))
+ except UnicodeDecodeError:
+ logger.warning("Could not correctly decode %s, skipping file",
+ tarinfo.name)
+ data_file.close()
+ del data_file
+
+ logger.debug("Done parsing file %s/%s", pkgid, fname)
- repodb.close()
logger.info("Finished repo parsing, %d total packages", len(pkgs))
return (reponame, pkgs.values())
diff --git a/requirements.txt b/requirements.txt
index e8488f50..ffff37d7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,3 +15,5 @@ ptpython==2.0.4
feedparser==6.0.1
bleach==3.2.0
requests==2.24.0
+xtarfile==0.0.4
+zstandard==0.14.0