You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
422 lines
15 KiB
422 lines
15 KiB
#!/home/patrickd/swarms/swarmsenv/bin/python3.12
|
|
# Parallel Python Software: http://www.parallelpython.com
|
|
# Copyright (c) 2005-2012 Vitalii Vanovschi.
|
|
# Copyright (c) 2015-2016 California Institute of Technology.
|
|
# Copyright (c) 2016-2024 The Uncertainty Quantification Foundation.
|
|
# All rights reserved.
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are met:
|
|
# * Redistributions of source code must retain the above copyright notice,
|
|
# this list of conditions and the following disclaimer.
|
|
# * Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
# * Neither the name of the author nor the names of its contributors
|
|
# may be used to endorse or promote products derived from this software
|
|
# without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
|
# THE POSSIBILITY OF SUCH DAMAGE.
|
|
"""
|
|
ppft server: the parallel python network server
|
|
"""
|
|
import atexit
|
|
import logging
|
|
import errno
|
|
import getopt
|
|
import sys
|
|
import socket
|
|
import threading
|
|
import random
|
|
import string
|
|
import signal
|
|
import time
|
|
import os
|
|
|
|
import ppft as pp
|
|
import ppft.auto as ppauto
|
|
import ppft.common as ppc
|
|
import ppft.transport as pptransport
|
|
|
|
copyright = ppc.copyright
|
|
__version__ = version = ppc.__version__
|
|
|
|
LISTEN_SOCKET_TIMEOUT = 20
|
|
|
|
# compatibility with Jython
|
|
STAT_SIGNAL = 'SIGUSR1' if 'java' not in sys.platform else 'SIGUSR2'
|
|
|
|
import hashlib
|
|
sha_new = hashlib.sha1
|
|
|
|
|
|
class _NetworkServer(pp.Server):
|
|
"""Network Server Class
|
|
"""
|
|
|
|
def __init__(self, ncpus="autodetect", interface="0.0.0.0",
|
|
broadcast="255.255.255.255", port=None, secret=None,
|
|
timeout=None, restart=False, proto=2, socket_timeout=3600, pid_file=None):
|
|
pp.Server.__init__(self, ncpus, (), secret, restart,
|
|
proto, socket_timeout)
|
|
if pid_file:
|
|
with open(pid_file, 'w') as pfile:
|
|
print(os.getpid(), file=pfile)
|
|
atexit.register(os.remove, pid_file)
|
|
self.host = interface
|
|
self.bcast = broadcast
|
|
if port is not None:
|
|
self.port = port
|
|
else:
|
|
self.port = ppc.randomport()
|
|
self.timeout = timeout
|
|
self.ncon = 0
|
|
self.last_con_time = time.time()
|
|
self.ncon_lock = threading.Lock()
|
|
|
|
self.logger.debug("Starting network server interface=%s port=%i"
|
|
% (self.host, self.port))
|
|
if self.timeout is not None:
|
|
self.logger.debug("ppserver will exit in %i seconds if no "\
|
|
"connections with clients exist" % (self.timeout))
|
|
ppc.start_thread("timeout_check", self.check_timeout)
|
|
|
|
def ncon_add(self, val):
|
|
"""Keeps track of the number of connections and time of the last one"""
|
|
self.ncon_lock.acquire()
|
|
self.ncon += val
|
|
self.last_con_time = time.time()
|
|
self.ncon_lock.release()
|
|
|
|
def check_timeout(self):
|
|
"""Checks if timeout happened and shutdowns server if it did"""
|
|
while True:
|
|
if self.ncon == 0:
|
|
idle_time = time.time() - self.last_con_time
|
|
if idle_time < self.timeout:
|
|
time.sleep(self.timeout - idle_time)
|
|
else:
|
|
self.logger.debug("exiting ppserver due to timeout (no client"\
|
|
" connections in last %i sec)", self.timeout)
|
|
os._exit(0)
|
|
else:
|
|
time.sleep(self.timeout)
|
|
|
|
def listen(self):
|
|
"""Initiates listenting to incoming connections"""
|
|
try:
|
|
self.ssocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
# following allows ppserver to restart faster on the same port
|
|
self.ssocket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
self.ssocket.settimeout(LISTEN_SOCKET_TIMEOUT)
|
|
self.ssocket.bind((self.host, self.port))
|
|
self.ssocket.listen(5)
|
|
except socket.error:
|
|
e = sys.exc_info()[1]
|
|
self.logger.error("Cannot create socket for %s:%s, %s", self.host, self.port, e)
|
|
|
|
try:
|
|
while 1:
|
|
csocket = None
|
|
# accept connections from outside
|
|
try:
|
|
(csocket, address) = self.ssocket.accept()
|
|
except socket.timeout:
|
|
pass
|
|
# don't exit on an interupt due to a signal
|
|
except socket.error:
|
|
e = sys.exc_info()[1]
|
|
if e.errno == errno.EINTR:
|
|
pass
|
|
if self._exiting:
|
|
return
|
|
# now do something with the clientsocket
|
|
# in this case, we'll pretend this is a threaded server
|
|
if csocket:
|
|
ppc.start_thread("client_socket", self.crun, (csocket, ))
|
|
except KeyboardInterrupt:
|
|
pass
|
|
except:
|
|
self.logger.debug("Exception in listen method (possibly expected)", exc_info=True)
|
|
finally:
|
|
self.logger.debug("Closing server socket")
|
|
self.ssocket.close()
|
|
|
|
def crun(self, csocket):
|
|
"""Authenticates client and handles its jobs"""
|
|
mysocket = pptransport.CSocketTransport(csocket, self.socket_timeout)
|
|
#send PP version
|
|
mysocket.send(version)
|
|
#generate a random string
|
|
srandom = "".join([random.choice(string.ascii_letters)
|
|
for i in range(16)])
|
|
mysocket.send(srandom)
|
|
answer = sha_new(ppc.b_(srandom+self.secret)).hexdigest()
|
|
clientanswer = ppc.str_(mysocket.receive())
|
|
if answer != clientanswer:
|
|
self.logger.warning("Authentication failed, client host=%s, port=%i"
|
|
% csocket.getpeername())
|
|
mysocket.send("FAILED")
|
|
csocket.close()
|
|
return
|
|
else:
|
|
mysocket.send("OK")
|
|
|
|
ctype = ppc.str_(mysocket.receive())
|
|
self.logger.debug("Control message received: " + ctype)
|
|
self.ncon_add(1)
|
|
try:
|
|
if ctype == "STAT":
|
|
#reset time at each new connection
|
|
self.get_stats()["local"].time = 0.0
|
|
#open('/tmp/pp.debug', 'a+').write('STAT: \n')
|
|
mysocket.send(str(self.get_ncpus()))
|
|
#open('/tmp/pp.debug', 'a+').write('STAT: get_ncpus\n')
|
|
while 1:
|
|
mysocket.receive()
|
|
#open('/tmp/pp.debug', 'a+').write('STAT: recvd\n')
|
|
mysocket.send(str(self.get_stats()["local"].time))
|
|
#open('/tmp/pp.debug', 'a+').write('STAT: _\n')
|
|
elif ctype=="EXEC":
|
|
while 1:
|
|
#open('/tmp/pp.debug', 'a+').write('EXEC: \n')
|
|
sfunc = mysocket.creceive()
|
|
#open('/tmp/pp.debug', 'a+').write('EXEC: '+repr((sfunc,))+'\n')
|
|
sargs = mysocket.receive()
|
|
#open('/tmp/pp.debug', 'a+').write('EXEC: '+repr((sargs,))+'\n')
|
|
fun = self.insert(sfunc, sargs)
|
|
sresult = fun(True)
|
|
#open('/tmp/pp.debug', 'a+').write('EXEC: '+repr((sresult,))+'\n')
|
|
mysocket.send(sresult)
|
|
#open('/tmp/pp.debug', 'a+').write('EXEC: _\n')
|
|
except:
|
|
if self._exiting:
|
|
return
|
|
if pp.SHOW_EXPECTED_EXCEPTIONS:
|
|
self.logger.debug("Exception in crun method (possibly expected)", exc_info=True)
|
|
self.logger.debug("Closing client socket")
|
|
csocket.close()
|
|
self.ncon_add(-1)
|
|
|
|
def broadcast(self):
|
|
"""Initiaates auto-discovery mechanism"""
|
|
discover = ppauto.Discover(self)
|
|
ppc.start_thread("server_broadcast", discover.run,
|
|
((self.host, self.port), (self.bcast, self.port)))
|
|
|
|
|
|
def parse_config(file_loc):
|
|
"""
|
|
Parses a config file in a very forgiving way.
|
|
"""
|
|
# If we don't have configobj installed then let the user know and exit
|
|
try:
|
|
from configobj import ConfigObj
|
|
except ImportError:
|
|
ie = sys.exc_info()[1]
|
|
#sysstderr = getattr(sys.stderr, 'buffer', sys.stderr)
|
|
print(("ERROR: You must have config obj installed to use"
|
|
"configuration files. You can still use command line switches."), file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if not os.access(file_loc, os.F_OK):
|
|
print("ERROR: Can not access %s." % arg, file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
args = {}
|
|
autodiscovery = False
|
|
debug = False
|
|
|
|
# Load the configuration file
|
|
config = ConfigObj(file_loc)
|
|
# try each config item and use the result if it exists. If it doesn't
|
|
# then simply pass and move along
|
|
try:
|
|
args['secret'] = config['general'].get('secret')
|
|
except:
|
|
pass
|
|
|
|
try:
|
|
autodiscovery = config['network'].as_bool('autodiscovery')
|
|
except:
|
|
pass
|
|
|
|
try:
|
|
args['interface'] = config['network'].get('interface',
|
|
default="0.0.0.0")
|
|
except:
|
|
pass
|
|
|
|
try:
|
|
args['broadcast'] = config['network'].get('broadcast')
|
|
except:
|
|
pass
|
|
|
|
try:
|
|
args['port'] = config['network'].as_int('port')
|
|
except:
|
|
pass
|
|
|
|
try:
|
|
debug = config['general'].as_bool('debug')
|
|
except:
|
|
pass
|
|
|
|
try:
|
|
args['ncpus'] = config['general'].as_int('workers')
|
|
except:
|
|
pass
|
|
|
|
try:
|
|
args['proto'] = config['general'].as_int('proto')
|
|
except:
|
|
pass
|
|
|
|
try:
|
|
args['restart'] = config['general'].as_bool('restart')
|
|
except:
|
|
pass
|
|
|
|
try:
|
|
args['timeout'] = config['network'].as_int('timeout')
|
|
except:
|
|
pass
|
|
|
|
try:
|
|
args['socket_timeout'] = config['network'].as_int('socket_timeout')
|
|
except:
|
|
pass
|
|
|
|
try:
|
|
args['pid_file'] = config['general'].get('pid_file')
|
|
except:
|
|
pass
|
|
# Return a tuple of the args dict and autodiscovery variable
|
|
return args, autodiscovery, debug
|
|
|
|
|
|
def print_usage():
|
|
"""Prints help"""
|
|
print("Parallel Python Network Server (pp-" + version + ")")
|
|
print("Usage: ppserver [-hdar] [-f format] [-n proto]"\
|
|
" [-c config_path] [-i interface] [-b broadcast]"\
|
|
" [-p port] [-w nworkers] [-s secret] [-t seconds]"\
|
|
" [-k seconds] [-P pid_file]")
|
|
print("")
|
|
print("Options: ")
|
|
print("-h : this help message")
|
|
print("-d : set log level to debug")
|
|
print("-f format : log format")
|
|
print("-a : enable auto-discovery service")
|
|
print("-r : restart worker process after each"\
|
|
" task completion")
|
|
print("-n proto : protocol number for pickle module")
|
|
print("-c path : path to config file")
|
|
print("-i interface : interface to listen")
|
|
print("-b broadcast : broadcast address for auto-discovery service")
|
|
print("-p port : port to listen")
|
|
print("-w nworkers : number of workers to start")
|
|
print("-s secret : secret for authentication")
|
|
print("-t seconds : timeout to exit if no connections with "\
|
|
"clients exist")
|
|
print("-k seconds : socket timeout in seconds")
|
|
print("-P pid_file : file to write PID to")
|
|
print("")
|
|
print("To print server stats send %s to its main process (unix only). " % STAT_SIGNAL)
|
|
print("")
|
|
print("Due to the security concerns always use a non-trivial secret key.")
|
|
print("Secret key set by -s switch will override secret key assigned by")
|
|
print("pp_secret variable in .pythonrc.py")
|
|
print("")
|
|
print("Please visit http://www.parallelpython.com for extended up-to-date")
|
|
print("documentation, examples and support forums")
|
|
|
|
|
|
def create_network_server(argv):
|
|
try:
|
|
opts, args = getopt.getopt(argv, "hdarn:c:b:i:p:w:s:t:f:k:P:", ["help"])
|
|
except getopt.GetoptError:
|
|
print_usage()
|
|
raise
|
|
|
|
args = {}
|
|
autodiscovery = False
|
|
debug = False
|
|
|
|
log_level = logging.WARNING
|
|
log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
|
|
for opt, arg in opts:
|
|
if opt in ("-h", "--help"):
|
|
print_usage()
|
|
sys.exit()
|
|
elif opt == "-c":
|
|
args, autodiscovery, debug = parse_config(arg)
|
|
elif opt == "-d":
|
|
debug = True
|
|
elif opt == "-f":
|
|
log_format = arg
|
|
elif opt == "-i":
|
|
args["interface"] = arg
|
|
elif opt == "-s":
|
|
args["secret"] = arg
|
|
elif opt == "-p":
|
|
args["port"] = int(arg)
|
|
elif opt == "-w":
|
|
args["ncpus"] = int(arg)
|
|
elif opt == "-a":
|
|
autodiscovery = True
|
|
elif opt == "-r":
|
|
args["restart"] = True
|
|
elif opt == "-b":
|
|
args["broadcast"] = arg
|
|
elif opt == "-n":
|
|
args["proto"] = int(arg)
|
|
elif opt == "-t":
|
|
args["timeout"] = int(arg)
|
|
elif opt == "-k":
|
|
args["socket_timeout"] = int(arg)
|
|
elif opt == "-P":
|
|
args["pid_file"] = arg
|
|
|
|
if debug:
|
|
log_level = logging.DEBUG
|
|
pp.SHOW_EXPECTED_EXCEPTIONS = True
|
|
|
|
log_handler = logging.StreamHandler()
|
|
log_handler.setFormatter(logging.Formatter(log_format))
|
|
logging.getLogger("pp").setLevel(log_level)
|
|
logging.getLogger("pp").addHandler(log_handler)
|
|
|
|
server = _NetworkServer(**args)
|
|
if autodiscovery:
|
|
server.broadcast()
|
|
return server
|
|
|
|
def signal_handler(signum, stack):
|
|
"""Prints server stats when %s is received (unix only). """ % STAT_SIGNAL
|
|
server.print_stats()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
server = create_network_server(sys.argv[1:])
|
|
statsignal = getattr(signal, STAT_SIGNAL, None)
|
|
if statsignal:
|
|
signal.signal(statsignal, signal_handler)
|
|
server.listen()
|
|
#have to destroy it here explicitly otherwise an exception
|
|
#comes out in Python 2.4
|
|
del server
|
|
|
|
|
|
# Parallel Python Software: http://www.parallelpython.com
|