source: rtems-source-builder/source-builder/sb/download.py @ 162cbda

5
Last change on this file since 162cbda was 4b3e0f8, checked in by Chris Johns <chrisj@…>, on 03/04/18 at 21:29:40

The libexpat project has moved to github. Fetch expat from github.

Close #3315

  • Property mode set to 100644
File size: 23.9 KB
Line 
1#
2# RTEMS Tools Project (http://www.rtems.org/)
3# Copyright 2010-2016 Chris Johns (chrisj@rtems.org)
4# All rights reserved.
5#
6# This file is part of the RTEMS Tools package in 'rtems-tools'.
7#
8# Permission to use, copy, modify, and/or distribute this software for any
9# purpose with or without fee is hereby granted, provided that the above
10# copyright notice and this permission notice appear in all copies.
11#
12# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19
20#
21# This code builds a package given a config file. It only builds to be
22# installed not to be package unless you run a packager around this.
23#
24
25from __future__ import print_function
26
27import hashlib
28import os
29import re
30import stat
31import sys
32try:
33    import urllib.request as urllib_request
34    import urllib.parse as urllib_parse
35except ImportError:
36    import urllib2 as urllib_request
37    import urlparse as urllib_parse
38
39import cvs
40import error
41import git
42import log
43import path
44import sources
45import version
46
47def _do_download(opts):
48    download = True
49    if opts.dry_run():
50        download = False
51        wa = opts.with_arg('download')
52        if wa is not None:
53            if wa[0] == 'with_download' and wa[1] == 'yes':
54                download = True
55    return download
56
57def _humanize_bytes(bytes, precision = 1):
58    abbrevs = (
59        (1 << 50, 'PB'),
60        (1 << 40, 'TB'),
61        (1 << 30, 'GB'),
62        (1 << 20, 'MB'),
63        (1 << 10, 'kB'),
64        (1, ' bytes')
65    )
66    if bytes == 1:
67        return '1 byte'
68    for factor, suffix in abbrevs:
69        if bytes >= factor:
70            break
71    return '%.*f%s' % (precision, float(bytes) / factor, suffix)
72
73def _sensible_url(url, used = 0):
74    space = 100
75    if len(url) > space:
76        size = int(space - 14)
77        url = url[:size] + '...<see log>'
78    return url
79
80def _hash_check(file_, absfile, macros, remove = True):
81    failed = False
82    hash = sources.get_hash(file_.lower(), macros)
83    if hash is not None:
84        hash = hash.split()
85        if len(hash) != 2:
86            raise error.internal('invalid hash format: %s' % (file_))
87        try:
88            hashlib_algorithms = hashlib.algorithms
89        except:
90            hashlib_algorithms = ['md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512']
91        if hash[0] not in hashlib_algorithms:
92            raise error.general('invalid hash algorithm for %s: %s' % (file_, hash[0]))
93        if hash[0] in ['md5', 'sha1']:
94            raise error.general('hash: %s: insecure: %s' % (file_, hash[0]))
95        hasher = None
96        _in = None
97        try:
98            hasher = hashlib.new(hash[0])
99            _in = open(path.host(absfile), 'rb')
100            hasher.update(_in.read())
101        except IOError as err:
102            log.notice('hash: %s: read error: %s' % (file_, str(err)))
103            failed = True
104        except:
105            msg = 'hash: %s: error' % (file_)
106            log.stderr(msg)
107            log.notice(msg)
108            if _in is not None:
109                _in.close()
110            raise
111        if _in is not None:
112            _in.close()
113        log.output('checksums: %s: %s => %s' % (file_, hasher.hexdigest(), hash[1]))
114        if hasher.hexdigest() != hash[1]:
115            log.warning('checksum error: %s' % (file_))
116            failed = True
117        if failed and remove:
118            log.warning('removing: %s' % (file_))
119            if path.exists(absfile):
120                try:
121                    os.remove(path.host(absfile))
122                except IOError as err:
123                    raise error.general('hash: %s: remove: %s' % (absfile, str(err)))
124                except:
125                    raise error.general('hash: %s: remove error' % (file_))
126        if hasher is not None:
127            del hasher
128    else:
129        if version.released():
130            raise error.general('%s: no hash found in released RSB' % (file_))
131        log.warning('%s: no hash found' % (file_))
132    return not failed
133
134def _local_path(source, pathkey, config):
135    for p in config.define(pathkey).split(':'):
136        local_prefix = path.abspath(p)
137        local = path.join(local_prefix, source['file'])
138        if source['local'] is None:
139            source['local_prefix'] = local_prefix
140            source['local'] = local
141        if path.exists(local):
142            source['local_prefix'] = local_prefix
143            source['local'] = local
144            _hash_check(source['file'], local, config.macros)
145            break
146
147def _http_parser(source, pathkey, config, opts):
148    #
149    # If the file has not been overrided attempt to recover a possible file name.
150    #
151    if 'file-override' not in source['options']:
152        #
153        # Hack for gitweb.cgi patch downloads. We rewrite the various fields.
154        #
155        if 'gitweb.cgi' in source['url']:
156            url = source['url']
157            if '?' not in url:
158                raise error.general('invalid gitweb.cgi request: %s' % (url))
159            req = url.split('?')[1]
160            if len(req) == 0:
161                raise error.general('invalid gitweb.cgi request: %s' % (url))
162            #
163            # The gitweb.cgi request should have:
164            #    p=<what>
165            #    a=patch
166            #    h=<hash>
167            # so extract the p and h parts to make the local name.
168            #
169            p = None
170            a = None
171            h = None
172            for r in req.split(';'):
173                if '=' not in r:
174                    raise error.general('invalid gitweb.cgi path: %s' % (url))
175                rs = r.split('=')
176                if rs[0] == 'p':
177                    p = rs[1].replace('.', '-')
178                elif rs[0] == 'a':
179                    a = rs[1]
180                elif rs[0] == 'h':
181                    h = rs[1]
182            if p is None or h is None:
183                raise error.general('gitweb.cgi path missing p or h: %s' % (url))
184            source['file'] = '%s-%s.patch' % (p, h)
185        #
186        # Wipe out everything special in the file name.
187        #
188        source['file'] = re.sub(r'[^a-zA-Z0-9.\-]+', '-', source['file'])
189        max_file_len = 127
190        if len(source['file']) > max_file_len:
191            raise error.general('file name length is greater than %i (maybe use --rsb-file=FILE option): %s' % \
192                                (max_file_len, source['file']))
193    #
194    # Check local path
195    #
196    _local_path(source, pathkey, config)
197    #
198    # Is the file compressed ?
199    #
200    esl = source['ext'].split('.')
201    if esl[-1:][0] == 'gz':
202        source['compressed-type'] = 'gzip'
203        source['compressed'] = '%{__gzip} -dc'
204    elif esl[-1:][0] == 'bz2':
205        source['compressed-type'] = 'bzip2'
206        source['compressed'] = '%{__bzip2} -dc'
207    elif esl[-1:][0] == 'zip':
208        source['compressed-type'] = 'zip'
209        source['compressed'] = '%{__unzip} -u'
210    elif esl[-1:][0] == 'xz':
211        source['compressed-type'] = 'xz'
212        source['compressed'] = '%{__xz} -dc'
213
214def _patchworks_parser(source, pathkey, config, opts):
215    #
216    # Check local path
217    #
218    _local_path(source, pathkey, config)
219    source['url'] = 'http%s' % (source['path'][2:])
220
221def _git_parser(source, pathkey, config, opts):
222    #
223    # Check local path
224    #
225    _local_path(source, pathkey, config)
226    #
227    # Symlink.
228    #
229    us = source['url'].split('?')
230    source['path'] = path.dirname(us[0])
231    source['file'] = path.basename(us[0])
232    source['name'], source['ext'] = path.splitext(source['file'])
233    if len(us) > 1:
234        source['args'] = us[1:]
235    source['local'] = \
236        path.join(source['local_prefix'], 'git', source['file'])
237    source['symlink'] = source['local']
238
239def _cvs_parser(source, pathkey, config, opts):
240    #
241    # Check local path
242    #
243    _local_path(source, pathkey, config)
244    #
245    # Symlink.
246    #
247    if not source['url'].startswith('cvs://'):
248        raise error.general('invalid cvs path: %s' % (source['url']))
249    us = source['url'].split('?')
250    try:
251        url = us[0]
252        source['file'] = url[url[6:].index(':') + 7:]
253        source['cvsroot'] = ':%s:' % (url[6:url[6:].index('/') + 6:])
254    except:
255        raise error.general('invalid cvs path: %s' % (source['url']))
256    for a in us[1:]:
257        _as = a.split('=')
258        if _as[0] == 'module':
259            if len(_as) != 2:
260                raise error.general('invalid cvs module: %s' % (a))
261            source['module'] = _as[1]
262        elif _as[0] == 'src-prefix':
263            if len(_as) != 2:
264                raise error.general('invalid cvs src-prefix: %s' % (a))
265            source['src_prefix'] = _as[1]
266        elif _as[0] == 'tag':
267            if len(_as) != 2:
268                raise error.general('invalid cvs tag: %s' % (a))
269            source['tag'] = _as[1]
270        elif _as[0] == 'date':
271            if len(_as) != 2:
272                raise error.general('invalid cvs date: %s' % (a))
273            source['date'] = _as[1]
274    if 'date' in source and 'tag' in source:
275        raise error.general('cvs URL cannot have a date and tag: %s' % (source['url']))
276    # Do here to ensure an ordered path, the URL can include options in any order
277    if 'module' in source:
278        source['file'] += '_%s' % (source['module'])
279    if 'tag' in source:
280        source['file'] += '_%s' % (source['tag'])
281    if 'date' in source:
282        source['file'] += '_%s' % (source['date'])
283    for c in '/@#%.-':
284        source['file'] = source['file'].replace(c, '_')
285    source['local'] = path.join(source['local_prefix'], 'cvs', source['file'])
286    if 'src_prefix' in source:
287        source['symlink'] = path.join(source['local'], source['src_prefix'])
288    else:
289        source['symlink'] = source['local']
290
291def _file_parser(source, pathkey, config, opts):
292    #
293    # Check local path
294    #
295    _local_path(source, pathkey, config)
296    #
297    # Get the paths sorted.
298    #
299    source['file'] = source['url'][6:]
300
301parsers = { 'http': _http_parser,
302            'ftp':  _http_parser,
303            'pw':   _patchworks_parser,
304            'git':  _git_parser,
305            'cvs':  _cvs_parser,
306            'file': _file_parser }
307
308def set_release_path(release_path, macros):
309    if release_path is None:
310        release_path = '%{rtems_release_url}/%{rsb_version}/sources'
311    macros.define('release_path', release_path)
312
313def parse_url(url, pathkey, config, opts, file_override = None):
314    #
315    # Split the source up into the parts we need.
316    #
317    source = {}
318    source['url'] = url
319    source['options'] = []
320    colon = url.find(':')
321    if url[colon + 1:colon + 3] != '//':
322        raise error.general('malforned URL (no protocol prefix): %s' % (url))
323    source['path'] = url[:colon + 3] + path.dirname(url[colon + 3:])
324    if file_override is None:
325        source['file'] = path.basename(url)
326    else:
327        bad_chars = [c for c in ['/', '\\', '?', '*'] if c in file_override]
328        if len(bad_chars) > 0:
329            raise error.general('bad characters in file name: %s' % (file_override))
330        log.output('download: file-override: %s' % (file_override))
331        source['file'] = file_override
332        source['options'] += ['file-override']
333    source['name'], source['ext'] = path.splitext(source['file'])
334    if source['name'].endswith('.tar'):
335        source['name'] = source['name'][:-4]
336        source['ext'] = '.tar' + source['ext']
337    #
338    # Get the file. Checks the local source directory first.
339    #
340    source['local'] = None
341    for p in parsers:
342        if url.startswith(p):
343            source['type'] = p
344            if parsers[p](source, pathkey, config, opts):
345                break
346    source['script'] = ''
347    return source
348
349def _http_downloader(url, local, config, opts):
350    if path.exists(local):
351        return True
352    #
353    # Hack for GitHub.
354    #
355    if url.startswith('https://api.github.com'):
356        url = urllib_parse.urljoin(url, config.expand('tarball/%{version}'))
357    dst = os.path.relpath(path.host(local))
358    log.output('download: (full) %s -> %s' % (url, dst))
359    log.notice('download: %s -> %s' % (_sensible_url(url, len(dst)), dst))
360    failed = False
361    if _do_download(opts):
362        _in = None
363        _out = None
364        _length = None
365        _have = 0
366        _chunk_size = 256 * 1024
367        _chunk = None
368        _last_percent = 200.0
369        _last_msg = ''
370        _have_status_output = False
371        _url = url
372        try:
373            try:
374                _in = None
375                _ssl_context = None
376                # See #2656
377                _req = urllib_request.Request(_url)
378                _req.add_header('User-Agent', 'Wget/1.16.3 (freebsd10.1)')
379                try:
380                    import ssl
381                    _ssl_context = ssl._create_unverified_context()
382                    _in = urllib_request.urlopen(_req, context = _ssl_context)
383                except:
384                    log.output('download: no ssl context')
385                    _ssl_context = None
386                if _ssl_context is None:
387                    _in = urllib_request.urlopen(_req)
388                if _url != _in.geturl():
389                    _url = _in.geturl()
390                    log.output(' redirect: %s' % (_url))
391                    log.notice(' redirect: %s' % (_sensible_url(_url)))
392                _out = open(path.host(local), 'wb')
393                try:
394                    _length = int(_in.info()['Content-Length'].strip())
395                except:
396                    pass
397                while True:
398                    _msg = '\rdownloading: %s - %s ' % (dst, _humanize_bytes(_have))
399                    if _length:
400                        _percent = round((float(_have) / _length) * 100, 2)
401                        if _percent != _last_percent:
402                            _msg += 'of %s (%0.0f%%) ' % (_humanize_bytes(_length), _percent)
403                    if _msg != _last_msg:
404                        extras = (len(_last_msg) - len(_msg))
405                        log.stdout_raw('%s%s' % (_msg, ' ' * extras + '\b' * extras))
406                        _last_msg = _msg
407                        _have_status_output = True
408                    _chunk = _in.read(_chunk_size)
409                    if not _chunk:
410                        break
411                    _out.write(_chunk)
412                    _have += len(_chunk)
413                log.stdout_raw('\n\r')
414            except:
415                if _have_status_output:
416                    log.stdout_raw('\n\r')
417                raise
418        except IOError as err:
419            log.notice('download: %s: error: %s' % (_sensible_url(_url), str(err)))
420            if path.exists(local):
421                os.remove(path.host(local))
422            failed = True
423        except ValueError as err:
424            log.notice('download: %s: error: %s' % (_sensible_url(_url), str(err)))
425            if path.exists(local):
426                os.remove(path.host(local))
427            failed = True
428        except:
429            msg = 'download: %s: error' % (_sensible_url(_url))
430            log.stderr(msg)
431            log.notice(msg)
432            if _in is not None:
433                _in.close()
434            if _out is not None:
435                _out.close()
436            raise
437        if _out is not None:
438            _out.close()
439        if _in is not None:
440            _in.close()
441            del _in
442        if not failed:
443            if not path.isfile(local):
444                raise error.general('source is not a file: %s' % (path.host(local)))
445            if not _hash_check(path.basename(local), local, config.macros, False):
446                raise error.general('checksum failure file: %s' % (dst))
447    return not failed
448
449def _git_downloader(url, local, config, opts):
450    repo = git.repo(local, opts, config.macros)
451    rlp = os.path.relpath(path.host(local))
452    us = url.split('?')
453    #
454    # Handle the various git protocols.
455    #
456    # remove 'git' from 'git://xxxx/xxxx?protocol=...'
457    #
458    url_base = us[0][len('git'):]
459    for a in us[1:]:
460        _as = a.split('=')
461        if _as[0] == 'protocol':
462            if len(_as) != 2:
463                raise error.general('invalid git protocol option: %s' % (_as))
464            if _as[1] == 'none':
465                # remove the rest of the protocol header leaving nothing.
466                us[0] = url_base[len('://'):]
467            else:
468                if _as[1] not in ['ssh', 'git', 'http', 'https', 'ftp', 'ftps', 'rsync']:
469                    raise error.general('unknown git protocol: %s' % (_as[1]))
470                us[0] = _as[1] + url_base
471    if not repo.valid():
472        log.notice('git: clone: %s -> %s' % (us[0], rlp))
473        if _do_download(opts):
474            repo.clone(us[0], local)
475    else:
476        repo.clean(['-f', '-d'])
477        repo.reset('--hard')
478        repo.checkout('master')
479    for a in us[1:]:
480        _as = a.split('=')
481        if _as[0] == 'branch' or _as[0] == 'checkout':
482            if len(_as) != 2:
483                raise error.general('invalid git branch/checkout: %s' % (_as))
484            log.notice('git: checkout: %s => %s' % (us[0], _as[1]))
485            if _do_download(opts):
486                repo.checkout(_as[1])
487        elif _as[0] == 'submodule':
488            if len(_as) != 2:
489                raise error.general('invalid git submodule: %s' % (_as))
490            log.notice('git: submodule: %s <= %s' % (us[0], _as[1]))
491            if _do_download(opts):
492                repo.submodule(_as[1])
493        elif _as[0] == 'fetch':
494            log.notice('git: fetch: %s -> %s' % (us[0], rlp))
495            if _do_download(opts):
496                repo.fetch()
497        elif _as[0] == 'merge':
498            log.notice('git: merge: %s' % (us[0]))
499            if _do_download(opts):
500                repo.merge()
501        elif _as[0] == 'pull':
502            log.notice('git: pull: %s' % (us[0]))
503            if _do_download(opts):
504                repo.pull()
505        elif _as[0] == 'reset':
506            arg = []
507            if len(_as) > 1:
508                arg = ['--%s' % (_as[1])]
509            log.notice('git: reset: %s' % (us[0]))
510            if _do_download(opts):
511                repo.reset(arg)
512        elif _as[0] == 'protocol':
513            pass
514        else:
515            raise error.general('invalid git option: %s' % (_as))
516    return True
517
518def _cvs_downloader(url, local, config, opts):
519    rlp = os.path.relpath(path.host(local))
520    us = url.split('?')
521    module = None
522    tag = None
523    date = None
524    src_prefix = None
525    for a in us[1:]:
526        _as = a.split('=')
527        if _as[0] == 'module':
528            if len(_as) != 2:
529                raise error.general('invalid cvs module: %s' % (a))
530            module = _as[1]
531        elif _as[0] == 'src-prefix':
532            if len(_as) != 2:
533                raise error.general('invalid cvs src-prefix: %s' % (a))
534            src_prefix = _as[1]
535        elif _as[0] == 'tag':
536            if len(_as) != 2:
537                raise error.general('invalid cvs tag: %s' % (a))
538            tag = _as[1]
539        elif _as[0] == 'date':
540            if len(_as) != 2:
541                raise error.general('invalid cvs date: %s' % (a))
542            date = _as[1]
543    repo = cvs.repo(local, opts, config.macros, src_prefix)
544    if not repo.valid():
545        if not path.isdir(local):
546            log.notice('Creating source directory: %s' % \
547                           (os.path.relpath(path.host(local))))
548            if _do_download(opts):
549                path.mkdir(local)
550            log.notice('cvs: checkout: %s -> %s' % (us[0], rlp))
551            if _do_download(opts):
552                repo.checkout(':%s' % (us[0][6:]), module, tag, date)
553    for a in us[1:]:
554        _as = a.split('=')
555        if _as[0] == 'update':
556            log.notice('cvs: update: %s' % (us[0]))
557            if _do_download(opts):
558                repo.update()
559        elif _as[0] == 'reset':
560            log.notice('cvs: reset: %s' % (us[0]))
561            if _do_download(opts):
562                repo.reset()
563    return True
564
565def _file_downloader(url, local, config, opts):
566    if not path.exists(local):
567        try:
568            src = url[7:]
569            dst = local
570            log.notice('download: copy %s -> %s' % (src, dst))
571            path.copy(src, dst)
572        except:
573            return False
574    return True
575
576downloaders = { 'http': _http_downloader,
577                'ftp':  _http_downloader,
578                'pw':   _http_downloader,
579                'git':  _git_downloader,
580                'cvs':  _cvs_downloader,
581                'file': _file_downloader }
582
583def get_file(url, local, opts, config):
584    if local is None:
585        raise error.general('source/patch path invalid')
586    if not path.isdir(path.dirname(local)) and not opts.download_disabled():
587        log.notice('Creating source directory: %s' % \
588                       (os.path.relpath(path.host(path.dirname(local)))))
589    log.output('making dir: %s' % (path.host(path.dirname(local))))
590    if _do_download(opts):
591        path.mkdir(path.dirname(local))
592    if not path.exists(local) and opts.download_disabled():
593        raise error.general('source not found: %s' % (path.host(local)))
594    #
595    # Check if a URL has been provided on the command line. If the package is
596    # released push to the start the RTEMS URL unless overrided by the command
597    # line option --with-release-url. The variant --without-release-url can
598    # override the released check.
599    #
600    url_bases = opts.urls()
601    try:
602        rtems_release_url_value = config.macros.expand('%{release_path}')
603    except:
604        rtems_release_url_value = None
605    rtems_release_url = None
606    if version.released() and rtems_release_url_value:
607        rtems_release_url = rtems_release_url_value
608    with_rel_url = opts.with_arg('release-url')
609    if with_rel_url[1] == 'not-found':
610        if config.defined('without_release_url'):
611            with_rel_url = ('without_release-url', 'yes')
612    if with_rel_url[0] == 'with_release-url':
613        if with_rel_url[1] == 'yes':
614            if rtems_release_url_value is None:
615                raise error.general('no valid release URL')
616            rtems_release_url = rtems_release_url_value
617        elif with_rel_url[1] == 'no':
618            pass
619        else:
620            rtems_release_url = with_rel_url[1]
621    elif with_rel_url[0] == 'without_release-url' and with_rel_url[1] == 'yes':
622        rtems_release_url = None
623    if rtems_release_url is not None:
624        log.trace('release url: %s' % (rtems_release_url))
625        #
626        # If the URL being fetched is under the release path do not add the
627        # sources release path because it is already there.
628        #
629        if not url.startswith(rtems_release_url):
630            if url_bases is None:
631                url_bases = [rtems_release_url]
632            else:
633                url_bases.append(rtems_release_url)
634    urls = []
635    if url_bases is not None:
636        #
637        # Split up the URL we are being asked to download.
638        #
639        url_path = urllib_parse.urlsplit(url)[2]
640        slash = url_path.rfind('/')
641        if slash < 0:
642            url_file = url_path
643        else:
644            url_file = url_path[slash + 1:]
645        log.trace('url_file: %s' %(url_file))
646        for base in url_bases:
647            #
648            # Hack to fix #3064 where --rsb-file is being used. This code is a
649            # mess and should be refactored.
650            #
651            if version.released() and base == rtems_release_url:
652                url_file = path.basename(local)
653            if base[-1:] != '/':
654                base += '/'
655            next_url = urllib_parse.urljoin(base, url_file)
656            log.trace('url: %s' %(next_url))
657            urls.append(next_url)
658    urls += url.split()
659    log.trace('_url: %s -> %s' % (','.join(urls), local))
660    for url in urls:
661        for dl in downloaders:
662            if url.startswith(dl):
663                if downloaders[dl](url, local, config, opts):
664                    return
665    if _do_download(opts):
666        raise error.general('downloading %s: all paths have failed, giving up' % (url))
Note: See TracBrowser for help on using the repository browser.