Merge branch 'fgets-perf'

This commit is contained in:
Dan McGee 2010-12-29 18:43:44 -06:00
commit a58083459b
7 changed files with 139 additions and 41 deletions

View File

@ -155,17 +155,22 @@ static struct pkg_operations *get_file_pkg_ops(void)
*/
static int parse_descfile(struct archive *a, pmpkg_t *newpkg)
{
char line[PATH_MAX];
char *ptr = NULL;
char *key = NULL;
int linenum = 0;
struct archive_read_buffer buf;
ALPM_LOG_FUNC;
/* loop until we reach EOF (where archive_fgets will return NULL) */
while(_alpm_archive_fgets(line, PATH_MAX, a) != NULL) {
memset(&buf, 0, sizeof(buf));
/* 512K for a line length seems reasonable */
buf.max_line_size = 512 * 1024;
/* loop until we reach EOF or other error */
while(_alpm_archive_fgets(a, &buf) == ARCHIVE_OK) {
char *line = _alpm_strtrim(buf.line);
linenum++;
_alpm_strtrim(line);
if(strlen(line) == 0 || line[0] == '#') {
continue;
}

View File

@ -219,8 +219,8 @@ static int sync_db_populate(pmdb_t *db)
}
#define READ_NEXT(s) do { \
if(_alpm_archive_fgets(s, sizeof(s), archive) == NULL) goto error; \
_alpm_strtrim(s); \
if(_alpm_archive_fgets(archive, &buf) != ARCHIVE_OK) goto error; \
s = _alpm_strtrim(buf.line); \
} while(0)
#define READ_AND_STORE(f) do { \
@ -238,10 +238,10 @@ static int sync_db_populate(pmdb_t *db)
static int sync_db_read(pmdb_t *db, struct archive *archive, struct archive_entry *entry)
{
char line[1024];
const char *entryname = NULL;
char *filename, *pkgname, *p, *q;
pmpkg_t *pkg;
struct archive_read_buffer buf;
ALPM_LOG_FUNC;
@ -260,6 +260,10 @@ static int sync_db_read(pmdb_t *db, struct archive *archive, struct archive_entr
_alpm_log(PM_LOG_FUNCTION, "loading package data from archive entry %s\n",
entryname);
memset(&buf, 0, sizeof(buf));
/* 512K for a line length seems reasonable */
buf.max_line_size = 512 * 1024;
/* get package and db file names */
STRDUP(pkgname, entryname, RET_ERR(PM_ERR_MEMORY, -1));
p = pkgname + strlen(pkgname);
@ -279,8 +283,9 @@ static int sync_db_read(pmdb_t *db, struct archive *archive, struct archive_entr
if(strcmp(filename, "desc") == 0 || strcmp(filename, "depends") == 0
|| strcmp(filename, "deltas") == 0) {
while(_alpm_archive_fgets(line, sizeof(line), archive) != NULL) {
_alpm_strtrim(line);
while(_alpm_archive_fgets(archive, &buf) == ARCHIVE_OK) {
char *line = _alpm_strtrim(buf.line);
if(strcmp(line, "%NAME%") == 0) {
READ_NEXT(line);
if(strcmp(line, pkg->name) != 0) {

View File

@ -771,33 +771,89 @@ int _alpm_test_md5sum(const char *filepath, const char *md5sum)
return(ret);
}
char *_alpm_archive_fgets(char *line, size_t size, struct archive *a)
/* Note: does NOT handle sparse files on purpose for speed. */
int _alpm_archive_fgets(struct archive *a, struct archive_read_buffer *b)
{
/* for now, just read one char at a time until we get to a
* '\n' char. we can optimize this later with an internal
* buffer. */
/* leave room for zero terminator */
char *last = line + size - 1;
char *i;
char *i = NULL;
int64_t offset;
int done = 0;
for(i = line; i < last; i++) {
int ret = archive_read_data(a, i, 1);
/* special check for first read- if null, return null,
* this indicates EOF */
if(i == line && (ret <= 0 || *i == '\0')) {
return(NULL);
while(1) {
/* have we processed this entire block? */
if(b->block + b->block_size == b->block_offset) {
if(b->ret == ARCHIVE_EOF) {
/* reached end of archive on the last read, now we are out of data */
goto cleanup;
}
/* zero-copy - this is the entire next block of data. */
b->ret = archive_read_data_block(a, (void*)&b->block,
&b->block_size, &offset);
b->block_offset = b->block;
/* error or end of archive with no data read, cleanup */
if(b->ret < ARCHIVE_OK ||
(b->block_size == 0 && b->ret == ARCHIVE_EOF)) {
goto cleanup;
}
}
/* check if read value was null or newline */
if(ret <= 0 || *i == '\0' || *i == '\n') {
last = i + 1;
break;
/* loop through the block looking for EOL characters */
for(i = b->block_offset; i < (b->block + b->block_size); i++) {
/* check if read value was null or newline */
if(*i == '\0' || *i == '\n') {
done = 1;
break;
}
}
/* allocate our buffer, or ensure our existing one is big enough */
if(!b->line) {
/* set the initial buffer to the read block_size */
CALLOC(b->line, b->block_size + 1, sizeof(char),
RET_ERR(PM_ERR_MEMORY, -1));
b->line_size = b->block_size + 1;
b->line_offset = b->line;
} else {
size_t needed = (b->line_offset - b->line) + (i - b->block_offset) + 1;
if(needed > b->max_line_size) {
RET_ERR(PM_ERR_MEMORY, -1);
}
if(needed > b->line_size) {
/* need to realloc + copy data to fit total length */
char *new;
CALLOC(new, needed, sizeof(char), RET_ERR(PM_ERR_MEMORY, -1));
memcpy(new, b->line, b->line_size);
b->line_size = needed;
b->line_offset = new + (b->line_offset - b->line);
free(b->line);
b->line = new;
}
}
if(done) {
size_t len = i - b->block_offset;
memcpy(b->line_offset, b->block_offset, len);
b->line_offset[len] = '\0';
b->block_offset = ++i;
/* this is the main return point; from here you can read b->line */
return(ARCHIVE_OK);
} else {
/* we've looked through the whole block but no newline, copy it */
size_t len = b->block + b->block_size - b->block_offset;
memcpy(b->line_offset, b->block_offset, len);
b->line_offset += len;
b->block_offset = i;
}
}
/* always null terminate the buffer */
*last = '\0';
return(line);
cleanup:
{
int ret = b->ret;
FREE(b->line);
memset(b, 0, sizeof(b));
return(ret);
}
}
int _alpm_splitname(const char *target, pmpkg_t *pkg)

View File

@ -59,6 +59,22 @@
_alpm_log(PM_LOG_DEBUG, "returning error %d from %s : %s\n", err, __func__, alpm_strerrorlast()); \
return(ret); } while(0)
/**
* Used as a buffer/state holder for _alpm_archive_fgets().
*/
struct archive_read_buffer {
char *line;
char *line_offset;
size_t line_size;
size_t max_line_size;
char *block;
char *block_offset;
size_t block_size;
int ret;
};
int _alpm_makepath(const char *path);
int _alpm_makepath_mode(const char *path, mode_t mode);
int _alpm_copyfile(const char *src, const char *dest);
@ -76,7 +92,7 @@ char *_alpm_filecache_find(const char *filename);
const char *_alpm_filecache_setup(void);
int _alpm_lstat(const char *path, struct stat *buf);
int _alpm_test_md5sum(const char *filepath, const char *md5sum);
char *_alpm_archive_fgets(char *line, size_t size, struct archive *a);
int _alpm_archive_fgets(struct archive *a, struct archive_read_buffer *b);
int _alpm_splitname(const char *target, pmpkg_t *pkg);
unsigned long _alpm_hash_sdbm(const char *str);

View File

@ -29,7 +29,9 @@ def __init__(self, rule):
self.result = 0
def __str__(self):
return "rule = %s" % self.rule
if len(self.rule) <= 40:
return self.rule
return self.rule[:37] + '...'
def check(self, root, retcode, localdb, files):
"""
@ -76,6 +78,9 @@ def check(self, root, retcode, localdb, files):
elif case == "VERSION":
if value != newpkg.version:
success = 0
elif case == "DESC":
if value != newpkg.desc:
success = 0
elif case == "GROUPS":
if not value in newpkg.groups:
success = 0
@ -153,7 +158,4 @@ def check(self, root, retcode, localdb, files):
self.result = success
return success
if __name__ != "__main__":
rule = pmrule("PKG_EXIST=dummy")
# vim: set ts=4 sw=4 et:

View File

@ -260,11 +260,6 @@ def check(self):
self.result["fail"] += 1
else:
msg = "SKIP"
print "\t[%s] %s" % (msg, i.rule)
i.result = success
if __name__ == "__main__":
pass
print "\t[%s] %s" % (msg, i)
# vim: set ts=4 sw=4 et:

View File

@ -0,0 +1,19 @@
self.description = "Install packages with huge descriptions"
p1 = pmpkg("pkg1")
p1.desc = 'A' * 500 * 1024
self.addpkg(p1)
p2 = pmpkg("pkg2")
p2.desc = 'A' * 600 * 1024
self.addpkg(p2)
self.args = "-U %s %s" % (p1.filename(), p2.filename())
# Note that the current cutoff on line length is 512K, so the first package
# will succeed while the second one will fail to record the description.
self.addrule("PACMAN_RETCODE=0")
self.addrule("PKG_EXIST=pkg1")
self.addrule("PKG_DESC=pkg1|%s" % p1.desc)
self.addrule("PKG_EXIST=pkg1")
self.addrule("!PKG_DESC=pkg1|%s" % p2.desc)