Improve collection parsing

pull/9/head
Thomas Sileo 2018-07-08 23:02:15 +02:00
rodzic b7d63f2302
commit 65c983e588
4 zmienionych plików z 58 dodań i 17 usunięć

Wyświetl plik

@ -12,6 +12,8 @@ from typing import Type
from typing import Union
from .backend import Backend
from .errors import ActivityNotFoundError
from .errors import ActivityUnavailableError
from .errors import BadActivityError
from .errors import DropActivityPreProcessError
from .errors import Error
@ -555,6 +557,10 @@ class BaseActivity(object, metaclass=_ActivityMeta):
except RemoteActivityGoneError:
logger.info(f"{recipient} is gone")
continue
except ActivityUnavailableError:
# TODO(tsileo): retry separately?
logger.info(f"failed {recipient} to fetch recipient")
continue
if actor.ACTIVITY_TYPE in ACTOR_TYPES:
if actor.endpoints:
@ -575,13 +581,12 @@ class BaseActivity(object, metaclass=_ActivityMeta):
continue
try:
col_actor = fetch_remote_activity(
item, expected=ActivityType.PERSON
)
except UnexpectedActivityTypeError:
logger.exception(f"failed to fetch actor {item!r}")
col_actor = fetch_remote_activity(item)
except ActivityUnavailableError:
# TODO(tsileo): retry separately?
logger.info(f"failed {recipient} to fetch recipient")
continue
except RemoteActivityGoneError:
except (RemoteActivityGoneError, ActivityNotFoundError):
logger.info(f"{item} is gone")
continue

Wyświetl plik

@ -1,5 +1,6 @@
import abc
import binascii
import json
import os
import typing
from typing import Any
@ -12,7 +13,9 @@ import requests
from .__version__ import __version__
from .collection import parse_collection
from .errors import ActivityNotFoundError
from .errors import ActivityUnavailableError
from .errors import RemoteActivityGoneError
from .urlutils import URLLookupFailedError
from .urlutils import check_url as check_url
if typing.TYPE_CHECKING:
@ -70,23 +73,45 @@ class Backend(abc.ABC):
pass # pragma: no cover
def fetch_iri(self, iri: str, **kwargs) -> "ap.ObjectType": # pragma: no cover
self.check_url(iri)
resp = requests.get(
iri,
headers={
"User-Agent": self.user_agent(),
"Accept": "application/activity+json",
},
**kwargs,
)
try:
self.check_url(iri)
except URLLookupFailedError:
raise ActivityUnavailableError(f"unable to fetch {iri}, url lookup failed")
try:
resp = requests.get(
iri,
headers={
"User-Agent": self.user_agent(),
"Accept": "application/activity+json",
},
timeout=15,
**kwargs,
)
except (
requests.exceptions.ConnectTimeout,
requests.exceptions.ReadTimeout,
requests.exceptions.ConnectionError,
):
raise ActivityUnavailableError(f"unable to fetch {iri}, connection error")
if resp.status_code == 404:
raise ActivityNotFoundError(f"{iri} is not found")
elif resp.status_code == 410:
raise RemoteActivityGoneError(f"{iri} is gone")
elif resp.status_code in [500, 502, 503]:
raise ActivityUnavailableError(
f"unable to fetch {iri}, server error ({resp.status_code})"
)
resp.raise_for_status()
return resp.json()
try:
out = resp.json()
except json.JSONDecodeError:
# TODO(tsileo): a special error type?
raise ActivityUnavailableError(f"{iri} is not JSON")
return out
@abc.abstractmethod
def inbox_check_duplicate(self, as_actor: "ap.Person", iri: str) -> bool:

Wyświetl plik

@ -75,3 +75,9 @@ class RecursionLimitExceededError(BadActivityError):
class UnexpectedActivityTypeError(BadActivityError):
"""Raised when an another activty was expected."""
class ActivityUnavailableError(ServerError):
"""Raises when fetching a remote activity times out."""
status_code = 503

Wyświetl plik

@ -4,6 +4,7 @@ import socket
from typing import Dict
from urllib.parse import urlparse
from .errors import Error
from .errors import ServerError
logger = logging.getLogger(__name__)
@ -16,6 +17,10 @@ class InvalidURLError(ServerError):
pass
class URLLookupFailedError(Error):
pass
def is_url_valid(url: str, debug: bool = False) -> bool:
parsed = urlparse(url)
if parsed.scheme not in ["http", "https"]:
@ -40,7 +45,7 @@ def is_url_valid(url: str, debug: bool = False) -> bool:
except socket.gaierror:
logger.exception(f"failed to lookup url {url}")
_CACHE[parsed.hostname] = False
return False
raise URLLookupFailedError(f"failed to lookup url {url}")
logger.debug(f"{ip_address}")