Initial commit

This commit is contained in:
2026-02-01 09:31:38 +01:00
commit e02db93960
4396 changed files with 1511612 additions and 0 deletions

View File

@@ -0,0 +1,42 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
"""
Tweepy Twitter API library
"""
__version__ = '4.14.0'
__author__ = 'Joshua Roesslein'
__license__ = 'MIT'
from tweepy.api import API
from tweepy.auth import (
AppAuthHandler, OAuthHandler, OAuth1UserHandler, OAuth2AppHandler,
OAuth2BearerHandler, OAuth2UserHandler
)
from tweepy.cache import Cache, FileCache, MemoryCache
from tweepy.client import Client, Response
from tweepy.cursor import Cursor
from tweepy.direct_message_event import (
DirectMessageEvent, DIRECT_MESSAGE_EVENT_FIELDS, DM_EVENT_FIELDS
)
from tweepy.errors import (
BadRequest, Forbidden, HTTPException, NotFound, TooManyRequests,
TweepyException, TwitterServerError, Unauthorized
)
from tweepy.list import List, LIST_FIELDS
from tweepy.media import Media, MEDIA_FIELDS
from tweepy.pagination import Paginator
from tweepy.place import Place, PLACE_FIELDS
from tweepy.poll import Poll, POLL_FIELDS
from tweepy.space import PUBLIC_SPACE_FIELDS, Space, SPACE_FIELDS
from tweepy.streaming import (
StreamingClient, StreamResponse, StreamRule
)
from tweepy.tweet import (
PUBLIC_TWEET_FIELDS, ReferencedTweet, Tweet, TWEET_FIELDS
)
from tweepy.user import User, USER_FIELDS
# Global, unauthenticated instance of API
api = API()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,24 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
"""
Tweepy.asynchronoous
Asynchronous interfaces with the Twitter API
"""
try:
import aiohttp
import async_lru
import oauthlib
except ModuleNotFoundError:
from tweepy.errors import TweepyException
raise TweepyException(
"tweepy.asynchronous requires aiohttp, async_lru, and oauthlib to be "
"installed"
)
from tweepy.asynchronous.client import AsyncClient
from tweepy.asynchronous.pagination import AsyncPaginator
from tweepy.asynchronous.streaming import AsyncStreamingClient

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,148 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
from math import inf
import aiohttp
from tweepy.client import Response
class AsyncPaginator:
"""AsyncPaginator( \
self, method, *args, limit=inf, pagination_token=None, **kwargs \
)
:class:`AsyncPaginator` can be used to paginate for any
:class:`AsyncClient` methods that support pagination
.. note::
When the returned response from the method being passed is of type
:class:`aiohttp.ClientResponse`, it will be deserialized in order to
parse the pagination tokens, likely negating any potential performance
benefits from using a :class:`aiohttp.ClientResponse` return type.
.. versionadded:: 4.11
Parameters
----------
method
:class:`AsyncClient` method to paginate for
args
Positional arguments to pass to ``method``
limit
Maximum number of requests to make to the API
pagination_token
Pagination token to start pagination with
kwargs
Keyword arguments to pass to ``method``
"""
def __init__(self, method, *args, **kwargs):
self.method = method
self.args = args
self.kwargs = kwargs
def __aiter__(self):
return AsyncPaginationIterator(self.method, *self.args, **self.kwargs)
def __reversed__(self):
return AsyncPaginationIterator(
self.method, *self.args, reverse=True, **self.kwargs
)
async def flatten(self, limit=inf):
"""Flatten paginated data
Parameters
----------
limit
Maximum number of results to yield
"""
if limit <= 0:
return
count = 0
async for response in AsyncPaginationIterator(
self.method, *self.args, **self.kwargs
):
if isinstance(response, Response):
response_data = response.data or []
elif isinstance(response, dict):
response_data = response.get("data", [])
else:
raise RuntimeError(
"AsyncPaginator.flatten does not support the "
f"{type(response)} return type for "
f"{self.method.__qualname__}"
)
for data in response_data:
yield data
count += 1
if count == limit:
return
class AsyncPaginationIterator:
def __init__(
self, method, *args, limit=inf, pagination_token=None, reverse=False,
**kwargs
):
self.method = method
self.args = args
self.limit = limit
self.kwargs = kwargs
self.reverse = reverse
if reverse:
self.previous_token = pagination_token
self.next_token = None
else:
self.previous_token = None
self.next_token = pagination_token
self.count = 0
def __aiter__(self):
return self
async def __anext__(self):
if self.reverse:
pagination_token = self.previous_token
else:
pagination_token = self.next_token
if self.count >= self.limit or self.count and pagination_token is None:
raise StopAsyncIteration
# https://twittercommunity.com/t/why-does-timeline-use-pagination-token-while-search-uses-next-token/150963
if self.method.__name__ in (
"search_all_tweets", "search_recent_tweets",
"get_all_tweets_count"
):
self.kwargs["next_token"] = pagination_token
else:
self.kwargs["pagination_token"] = pagination_token
response = await self.method(*self.args, **self.kwargs)
if isinstance(response, Response):
meta = response.meta
elif isinstance(response, dict):
meta = response.get("meta", {})
elif isinstance(response, aiohttp.ClientResponse):
meta = (await response.json()).get("meta", {})
else:
raise RuntimeError(
f"Unknown {type(response)} return type for "
f"{self.method.__qualname__}"
)
self.previous_token = meta.get("previous_token")
self.next_token = meta.get("next_token")
self.count += 1
return response

View File

@@ -0,0 +1,641 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
import asyncio
import json
import logging
from math import inf
from platform import python_version
import traceback
import aiohttp
import tweepy
from tweepy.asynchronous.client import AsyncBaseClient
from tweepy.client import Response
from tweepy.errors import TweepyException
from tweepy.streaming import StreamResponse, StreamRule
from tweepy.tweet import Tweet
log = logging.getLogger(__name__)
class AsyncBaseStream:
def __init__(self, *, max_retries=inf, proxy=None):
self.max_retries = max_retries
self.proxy = proxy
self.session = None
self.task = None
self.user_agent = (
f"Python/{python_version()} "
f"aiohttp/{aiohttp.__version__} "
f"Tweepy/{tweepy.__version__}"
)
async def _connect(
self, method, url, params=None, headers=None, body=None,
oauth_client=None, timeout=21
):
error_count = 0
# https://developer.twitter.com/en/docs/twitter-api/v1/tweets/filter-realtime/guides/connecting
# https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/integrate/handling-disconnections
# https://developer.twitter.com/en/docs/twitter-api/tweets/volume-streams/integrate/handling-disconnections
network_error_wait = 0
network_error_wait_step = 0.25
network_error_wait_max = 16
http_error_wait = http_error_wait_start = 5
http_error_wait_max = 320
http_429_error_wait_start = 60
if self.session is None or self.session.closed:
self.session = aiohttp.ClientSession(
connector=aiohttp.TCPConnector(enable_cleanup_closed=True),
timeout=aiohttp.ClientTimeout(sock_read=timeout)
)
self.session.headers["User-Agent"] = self.user_agent
try:
while error_count <= self.max_retries:
try:
if oauth_client is not None:
url, headers, body = oauth_client.sign(
url, http_method=method, headers=headers, body=body
)
async with self.session.request(
method, url, params=params, headers=headers, data=body,
proxy=self.proxy
) as resp:
if resp.status == 200:
error_count = 0
http_error_wait = http_error_wait_start
network_error_wait = 0
await self.on_connect()
async for line in resp.content:
line = line.strip()
if line:
await self.on_data(line)
else:
await self.on_keep_alive()
await self.on_closed(resp)
else:
await self.on_request_error(resp.status)
# The error text is logged here instead of in
# on_request_error to keep on_request_error
# backwards-compatible. In a future version, the
# ClientResponse should be passed to
# on_request_error.
response_text = await resp.text()
log.error(
"HTTP error response text: %s", response_text
)
error_count += 1
if resp.status in (420, 429):
if http_error_wait < http_429_error_wait_start:
http_error_wait = http_429_error_wait_start
await asyncio.sleep(http_error_wait)
http_error_wait *= 2
if resp.status != 420:
if http_error_wait > http_error_wait_max:
http_error_wait = http_error_wait_max
except (aiohttp.ClientConnectionError,
aiohttp.ClientPayloadError) as e:
await self.on_connection_error()
# The error text is logged here instead of in
# on_connection_error to keep on_connection_error
# backwards-compatible. In a future version, the error
# should be passed to on_connection_error.
log.error(
"Connection error: %s",
"".join(
traceback.format_exception_only(type(e), e)
).rstrip()
)
await asyncio.sleep(network_error_wait)
network_error_wait += network_error_wait_step
if network_error_wait > network_error_wait_max:
network_error_wait = network_error_wait_max
except asyncio.CancelledError:
return
except Exception as e:
await self.on_exception(e)
finally:
await self.session.close()
await self.on_disconnect()
def disconnect(self):
"""Disconnect the stream"""
if self.task is not None:
self.task.cancel()
async def on_closed(self, resp):
"""|coroutine|
This is called when the stream has been closed by Twitter.
Parameters
----------
response : aiohttp.ClientResponse
The response from Twitter
"""
log.error("Stream connection closed by Twitter")
async def on_connect(self):
"""|coroutine|
This is called after successfully connecting to the streaming API.
"""
log.info("Stream connected")
async def on_connection_error(self):
"""|coroutine|
This is called when the stream connection errors or times out.
"""
log.error("Stream connection has errored or timed out")
async def on_disconnect(self):
"""|coroutine|
This is called when the stream has disconnected.
"""
log.info("Stream disconnected")
async def on_exception(self, exception):
"""|coroutine|
This is called when an unhandled exception occurs.
Parameters
----------
exception : Exception
The unhandled exception
"""
log.exception("Stream encountered an exception")
async def on_keep_alive(self):
"""|coroutine|
This is called when a keep-alive signal is received.
"""
log.debug("Received keep-alive signal")
async def on_request_error(self, status_code):
"""|coroutine|
This is called when a non-200 HTTP status code is encountered.
Parameters
----------
status_code : int
The HTTP status code encountered
"""
log.error("Stream encountered HTTP Error: %d", status_code)
class AsyncStreamingClient(AsyncBaseClient, AsyncBaseStream):
"""Stream realtime Tweets asynchronously with Twitter API v2
.. versionadded:: 4.10
Parameters
----------
bearer_token : str
Twitter API Bearer Token
return_type : type[dict | requests.Response | Response]
Type to return from requests to the API
wait_on_rate_limit : bool
Whether or not to wait before retrying when a rate limit is
encountered. This applies to requests besides those that connect to a
stream (see ``max_retries``).
max_retries: int | None
Number of times to attempt to (re)connect the stream.
proxy : str | None
URL of the proxy to use when connecting to the stream
Attributes
----------
session : aiohttp.ClientSession | None
Aiohttp client session used to connect to the API
task : asyncio.Task | None
The task running the stream
user_agent : str
User agent used when connecting to the API
"""
def __init__(self, bearer_token, *, return_type=Response,
wait_on_rate_limit=False, **kwargs):
"""__init__( \
bearer_token, *, return_type=Response, wait_on_rate_limit=False, \
max_retries=inf, proxy=None \
)
"""
AsyncBaseClient.__init__(self, bearer_token, return_type=return_type,
wait_on_rate_limit=wait_on_rate_limit)
AsyncBaseStream.__init__(self, **kwargs)
async def _connect(self, method, endpoint, **kwargs):
url = f"https://api.twitter.com/2/tweets/{endpoint}/stream"
headers = {"Authorization": f"Bearer {self.bearer_token}"}
await super()._connect(method, url, headers=headers, **kwargs)
def _process_data(self, data, data_type=None):
if data_type is StreamRule:
if isinstance(data, list):
rules = []
for rule in data:
if "tag" in rule:
rules.append(StreamRule(
value=rule["value"], id=rule["id"], tag=rule["tag"]
))
else:
rules.append(StreamRule(value=rule["value"],
id=rule["id"]))
return rules
elif data is not None:
if "tag" in data:
return StreamRule(value=data["value"], id=data["id"],
tag=data["tag"])
else:
return StreamRule(value=data["value"], id=data["id"])
else:
return super()._process_data(data, data_type=data_type)
async def add_rules(self, add, **params):
"""add_rules(add, *, dry_run)
|coroutine|
Add rules to filtered stream.
Parameters
----------
add : list[StreamRule] | StreamRule
Specifies the operation you want to perform on the rules.
dry_run : bool
Set to true to test the syntax of your rule without submitting it.
This is useful if you want to check the syntax of a rule before
removing one or more of your existing rules.
Returns
-------
dict | requests.Response | Response
References
----------
https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/api-reference/post-tweets-search-stream-rules
"""
json = {"add": []}
if isinstance(add, StreamRule):
add = (add,)
for rule in add:
if rule.tag is not None:
json["add"].append({"value": rule.value, "tag": rule.tag})
else:
json["add"].append({"value": rule.value})
return await self._make_request(
"POST", f"/2/tweets/search/stream/rules", params=params,
endpoint_parameters=("dry_run",), json=json, data_type=StreamRule
)
async def delete_rules(self, ids, **params):
"""delete_rules(ids, *, dry_run)
|coroutine|
Delete rules from filtered stream.
Parameters
----------
ids : int | str | list[int | str | StreamRule] | StreamRule
Array of rule IDs, each one representing a rule already active in
your stream. IDs must be submitted as strings.
dry_run : bool
Set to true to test the syntax of your rule without submitting it.
This is useful if you want to check the syntax of a rule before
removing one or more of your existing rules.
Returns
-------
dict | requests.Response | Response
References
----------
https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/api-reference/post-tweets-search-stream-rules
"""
json = {"delete": {"ids": []}}
if isinstance(ids, (int, str, StreamRule)):
ids = (ids,)
for id in ids:
if isinstance(id, StreamRule):
json["delete"]["ids"].append(str(id.id))
else:
json["delete"]["ids"].append(str(id))
return await self._make_request(
"POST", f"/2/tweets/search/stream/rules", params=params,
endpoint_parameters=("dry_run",), json=json, data_type=StreamRule
)
def filter(self, **params):
"""filter( \
*, backfill_minutes=None, expansions=None, media_fields=None, \
place_fields=None, poll_fields=None, tweet_fields=None, \
user_fields=None \
)
Streams Tweets in real-time based on a specific set of filter rules.
If you are using the academic research product track, you can connect
up to two `redundant connections <filter redundant connections_>`_ to
maximize your streaming up-time.
The Tweets returned by this endpoint count towards the Project-level
`Tweet cap`_.
Parameters
----------
backfill_minutes : int | None
By passing this parameter, you can request up to five (5) minutes
worth of streaming data that you might have missed during a
disconnection to be delivered to you upon reconnection. The
backfilled Tweets will automatically flow through the reconnected
stream, with older Tweets generally being delivered before any
newly matching Tweets. You must include a whole number between 1
and 5 as the value to this parameter.
This feature will deliver duplicate Tweets, meaning that if you
were disconnected for 90 seconds, and you requested two minutes of
backfill, you will receive 30 seconds worth of duplicate Tweets.
Due to this, you should make sure your system is tolerant of
duplicate data.
This feature is currently only available to the Academic Research
product track.
expansions : list[str] | str
:ref:`expansions_parameter`
media_fields : list[str] | str
:ref:`media_fields_parameter`
place_fields : list[str] | str
:ref:`place_fields_parameter`
poll_fields : list[str] | str
:ref:`poll_fields_parameter`
tweet_fields : list[str] | str
:ref:`tweet_fields_parameter`
user_fields : list[str] | str
:ref:`user_fields_parameter`
Raises
------
TweepyException
When the stream is already connected
Returns
-------
asyncio.Task
The task running the stream
References
----------
https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/api-reference/get-tweets-search-stream
.. _filter redundant connections: https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/integrate/recovery-and-redundancy-features
.. _Tweet cap: https://developer.twitter.com/en/docs/twitter-api/tweet-caps
"""
if self.task is not None and not self.task.done():
raise TweepyException("Stream is already connected")
endpoint = "search"
params = self._process_params(
params, endpoint_parameters=(
"backfill_minutes", "expansions", "media.fields",
"place.fields", "poll.fields", "tweet.fields", "user.fields"
)
)
self.task = asyncio.create_task(
self._connect("GET", endpoint, params=params)
)
# Use name parameter when support for Python 3.7 is dropped
return self.task
async def get_rules(self, **params):
"""get_rules(*, ids)
|coroutine|
Return a list of rules currently active on the streaming endpoint,
either as a list or individually.
Parameters
----------
ids : list[str] | str
Comma-separated list of rule IDs. If omitted, all rules are
returned.
Returns
-------
dict | requests.Response | Response
References
----------
https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/api-reference/get-tweets-search-stream-rules
"""
return await self._make_request(
"GET", f"/2/tweets/search/stream/rules", params=params,
endpoint_parameters=("ids",), data_type=StreamRule
)
def sample(self, **params):
"""sample( \
*, backfill_minutes=None, expansions=None, media_fields=None, \
place_fields=None, poll_fields=None, tweet_fields=None, \
user_fields=None \
)
Streams about 1% of all Tweets in real-time.
If you are using the academic research product track, you can connect
up to two `redundant connections <sample redundant connections_>`_ to
maximize your streaming up-time.
Parameters
----------
backfill_minutes : int | None
By passing this parameter, you can request up to five (5) minutes
worth of streaming data that you might have missed during a
disconnection to be delivered to you upon reconnection. The
backfilled Tweets will automatically flow through the reconnected
stream, with older Tweets generally being delivered before any
newly matching Tweets. You must include a whole number between 1
and 5 as the value to this parameter.
This feature will deliver duplicate Tweets, meaning that if you
were disconnected for 90 seconds, and you requested two minutes of
backfill, you will receive 30 seconds worth of duplicate Tweets.
Due to this, you should make sure your system is tolerant of
duplicate data.
This feature is currently only available to the Academic Research
product track.
expansions : list[str] | str
:ref:`expansions_parameter`
media_fields : list[str] | str
:ref:`media_fields_parameter`
place_fields : list[str] | str
:ref:`place_fields_parameter`
poll_fields : list[str] | str
:ref:`poll_fields_parameter`
tweet_fields : list[str] | str
:ref:`tweet_fields_parameter`
user_fields : list[str] | str
:ref:`user_fields_parameter`
Raises
------
TweepyException
When the stream is already connected
Returns
-------
asyncio.Task
The task running the stream
References
----------
https://developer.twitter.com/en/docs/twitter-api/tweets/volume-streams/api-reference/get-tweets-sample-stream
.. _sample redundant connections: https://developer.twitter.com/en/docs/twitter-api/tweets/volume-streams/integrate/recovery-and-redundancy-features
"""
if self.task is not None and not self.task.done():
raise TweepyException("Stream is already connected")
endpoint = "sample"
params = self._process_params(
params, endpoint_parameters=(
"backfill_minutes", "expansions", "media.fields",
"place.fields", "poll.fields", "tweet.fields", "user.fields"
)
)
self.task = asyncio.create_task(
self._connect("GET", endpoint, params=params)
)
# Use name parameter when support for Python 3.7 is dropped
return self.task
async def on_data(self, raw_data):
"""|coroutine|
This is called when raw data is received from the stream.
This method handles sending the data to other methods.
Parameters
----------
raw_data : JSON
The raw data from the stream
References
----------
https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/integrate/consuming-streaming-data
"""
data = json.loads(raw_data)
tweet = None
includes = {}
errors = []
matching_rules = []
if "data" in data:
tweet = Tweet(data["data"])
await self.on_tweet(tweet)
if "includes" in data:
includes = self._process_includes(data["includes"])
await self.on_includes(includes)
if "errors" in data:
errors = data["errors"]
await self.on_errors(errors)
if "matching_rules" in data:
matching_rules = [
StreamRule(id=rule["id"], tag=rule["tag"])
for rule in data["matching_rules"]
]
await self.on_matching_rules(matching_rules)
await self.on_response(
StreamResponse(tweet, includes, errors, matching_rules)
)
async def on_tweet(self, tweet):
"""|coroutine|
This is called when a Tweet is received.
Parameters
----------
tweet : Tweet
The Tweet received
"""
pass
async def on_includes(self, includes):
"""|coroutine|
This is called when includes are received.
Parameters
----------
includes : dict
The includes received
"""
pass
async def on_errors(self, errors):
"""|coroutine|
This is called when errors are received.
Parameters
----------
errors : dict
The errors received
"""
log.error("Received errors: %s", errors)
async def on_matching_rules(self, matching_rules):
"""|coroutine|
This is called when matching rules are received.
Parameters
----------
matching_rules : list[StreamRule]
The matching rules received
"""
pass
async def on_response(self, response):
"""|coroutine|
This is called when a response is received.
Parameters
----------
response : StreamResponse
The response received
"""
log.debug("Received response: %s", response)

View File

@@ -0,0 +1,223 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
import logging
import warnings
import requests
from requests.auth import AuthBase, HTTPBasicAuth
from requests_oauthlib import OAuth1, OAuth1Session, OAuth2Session
from tweepy.errors import TweepyException
WARNING_MESSAGE = """Warning! Due to a Twitter API bug, signin_with_twitter
and access_type don't always play nice together. Details
https://dev.twitter.com/discussions/21281"""
log = logging.getLogger(__name__)
class OAuth1UserHandler:
"""OAuth 1.0a User Context authentication handler
.. versionchanged:: 4.5
Renamed from :class:`OAuthHandler`
"""
def __init__(self, consumer_key, consumer_secret, access_token=None,
access_token_secret=None, callback=None):
if not isinstance(consumer_key, (str, bytes)):
raise TypeError("Consumer key must be string or bytes, not "
+ type(consumer_key).__name__)
if not isinstance(consumer_secret, (str, bytes)):
raise TypeError("Consumer secret must be string or bytes, not "
+ type(consumer_secret).__name__)
self.consumer_key = consumer_key
self.consumer_secret = consumer_secret
self.access_token = access_token
self.access_token_secret = access_token_secret
self.callback = callback
self.username = None
self.request_token = {}
self.oauth = OAuth1Session(consumer_key, client_secret=consumer_secret,
callback_uri=self.callback)
def apply_auth(self):
return OAuth1(
self.consumer_key, client_secret=self.consumer_secret,
resource_owner_key=self.access_token,
resource_owner_secret=self.access_token_secret, decoding=None
)
def _get_oauth_url(self, endpoint):
return 'https://api.twitter.com/oauth/' + endpoint
def _get_request_token(self, access_type=None):
try:
url = self._get_oauth_url('request_token')
if access_type:
url += f'?x_auth_access_type={access_type}'
return self.oauth.fetch_request_token(url)
except Exception as e:
raise TweepyException(e)
def get_authorization_url(self, signin_with_twitter=False,
access_type=None):
"""Get the authorization URL to redirect the user to"""
try:
if signin_with_twitter:
url = self._get_oauth_url('authenticate')
if access_type:
log.warning(WARNING_MESSAGE)
else:
url = self._get_oauth_url('authorize')
self.request_token = self._get_request_token(
access_type=access_type
)
return self.oauth.authorization_url(url)
except Exception as e:
raise TweepyException(e)
def get_access_token(self, verifier=None):
"""After user has authorized the app, get access token and secret with
verifier
"""
try:
url = self._get_oauth_url('access_token')
self.oauth = OAuth1Session(
self.consumer_key, client_secret=self.consumer_secret,
resource_owner_key=self.request_token['oauth_token'],
resource_owner_secret=self.request_token['oauth_token_secret'],
verifier=verifier, callback_uri=self.callback
)
resp = self.oauth.fetch_access_token(url)
self.access_token = resp['oauth_token']
self.access_token_secret = resp['oauth_token_secret']
return self.access_token, self.access_token_secret
except Exception as e:
raise TweepyException(e)
def set_access_token(self, key, secret):
"""
.. deprecated:: 4.5
Set through initialization instead.
"""
self.access_token = key
self.access_token_secret = secret
class OAuthHandler(OAuth1UserHandler):
"""Alias for :class:`OAuth1UserHandler`
.. deprecated:: 4.5
Use :class:`OAuth1UserHandler` instead.
"""
def __init__(self, consumer_key, consumer_secret, access_token=None,
access_token_secret=None, callback=None):
warnings.warn(
"OAuthHandler is deprecated; use OAuth1UserHandler instead.",
DeprecationWarning
)
super().__init__(consumer_key, consumer_secret, access_token,
access_token_secret, callback)
class OAuth2AppHandler:
"""OAuth 2.0 Bearer Token (App-Only) using API / Consumer key and secret
authentication handler
.. versionchanged:: 4.5
Renamed from :class:`AppAuthHandler`
"""
def __init__(self, consumer_key, consumer_secret):
self.consumer_key = consumer_key
self.consumer_secret = consumer_secret
self._bearer_token = ''
resp = requests.post(
'https://api.twitter.com/oauth2/token',
auth=(self.consumer_key, self.consumer_secret),
data={'grant_type': 'client_credentials'}
)
data = resp.json()
if data.get('token_type') != 'bearer':
raise TweepyException('Expected token_type to equal "bearer", '
f'but got {data.get("token_type")} instead')
self._bearer_token = data['access_token']
def apply_auth(self):
return OAuth2BearerHandler(self._bearer_token)
class AppAuthHandler(OAuth2AppHandler):
"""Alias for :class:`OAuth2AppHandler`
.. deprecated:: 4.5
Use :class:`OAuth2AppHandler` instead.
"""
def __init__(self, consumer_key, consumer_secret):
warnings.warn(
"AppAuthHandler is deprecated; use OAuth2AppHandler instead.",
DeprecationWarning
)
super().__init__(consumer_key, consumer_secret)
class OAuth2BearerHandler(AuthBase):
"""OAuth 2.0 Bearer Token (App-Only) authentication handler
.. versionadded:: 4.5
"""
def __init__(self, bearer_token):
self.bearer_token = bearer_token
def __call__(self, request):
request.headers['Authorization'] = 'Bearer ' + self.bearer_token
return request
def apply_auth(self):
return self
class OAuth2UserHandler(OAuth2Session):
"""OAuth 2.0 Authorization Code Flow with PKCE (User Context)
authentication handler
.. versionadded:: 4.5
"""
def __init__(self, *, client_id, redirect_uri, scope, client_secret=None):
super().__init__(client_id, redirect_uri=redirect_uri, scope=scope)
if client_secret is not None:
self.auth = HTTPBasicAuth(client_id, client_secret)
else:
self.auth = None
def get_authorization_url(self):
"""Get the authorization URL to redirect the user to"""
authorization_url, state = self.authorization_url(
"https://twitter.com/i/oauth2/authorize",
code_challenge=self._client.create_code_challenge(
self._client.create_code_verifier(128), "S256"
), code_challenge_method="S256"
)
return authorization_url
def fetch_token(self, authorization_response):
"""After user has authorized the app, fetch access token with
authorization response URL
"""
return super().fetch_token(
"https://api.twitter.com/2/oauth2/token",
authorization_response=authorization_response,
auth=self.auth,
include_client_id=True,
code_verifier=self._client.code_verifier
)

View File

@@ -0,0 +1,427 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
import datetime
import hashlib
import logging
import pickle
import threading
import time
import os
try:
import fcntl
except ImportError:
# Probably on a windows system
# TODO: use win32file
pass
log = logging.getLogger(__name__)
class Cache:
"""Cache interface"""
def __init__(self, timeout=60):
"""Initialize the cache
timeout: number of seconds to keep a cached entry
"""
self.timeout = timeout
def store(self, key, value):
"""Add new record to cache
key: entry key
value: data of entry
"""
raise NotImplementedError
def get(self, key, timeout=None):
"""Get cached entry if exists and not expired
key: which entry to get
timeout: override timeout with this value [optional]
"""
raise NotImplementedError
def count(self):
"""Get count of entries currently stored in cache"""
raise NotImplementedError
def cleanup(self):
"""Delete any expired entries in cache."""
raise NotImplementedError
def flush(self):
"""Delete all cached entries"""
raise NotImplementedError
class MemoryCache(Cache):
"""In-memory cache"""
def __init__(self, timeout=60):
Cache.__init__(self, timeout)
self._entries = {}
self.lock = threading.Lock()
def __getstate__(self):
# pickle
return {'entries': self._entries, 'timeout': self.timeout}
def __setstate__(self, state):
# unpickle
self.lock = threading.Lock()
self._entries = state['entries']
self.timeout = state['timeout']
def _is_expired(self, entry, timeout):
return timeout > 0 and (time.time() - entry[0]) >= timeout
def store(self, key, value):
self.lock.acquire()
self._entries[key] = (time.time(), value)
self.lock.release()
def get(self, key, timeout=None):
self.lock.acquire()
try:
# check to see if we have this key
entry = self._entries.get(key)
if not entry:
# no hit, return nothing
return None
# use provided timeout in arguments if provided
# otherwise use the one provided during init.
if timeout is None:
timeout = self.timeout
# make sure entry is not expired
if self._is_expired(entry, timeout):
# entry expired, delete and return nothing
del self._entries[key]
return None
# entry found and not expired, return it
return entry[1]
finally:
self.lock.release()
def count(self):
return len(self._entries)
def cleanup(self):
self.lock.acquire()
try:
for k, v in dict(self._entries).items():
if self._is_expired(v, self.timeout):
del self._entries[k]
finally:
self.lock.release()
def flush(self):
self.lock.acquire()
self._entries.clear()
self.lock.release()
class FileCache(Cache):
"""File-based cache"""
# locks used to make cache thread-safe
cache_locks = {}
def __init__(self, cache_dir, timeout=60):
Cache.__init__(self, timeout)
if os.path.exists(cache_dir) is False:
os.mkdir(cache_dir)
self.cache_dir = cache_dir
if cache_dir in FileCache.cache_locks:
self.lock = FileCache.cache_locks[cache_dir]
else:
self.lock = threading.Lock()
FileCache.cache_locks[cache_dir] = self.lock
if os.name == 'posix':
self._lock_file = self._lock_file_posix
self._unlock_file = self._unlock_file_posix
elif os.name == 'nt':
self._lock_file = self._lock_file_win32
self._unlock_file = self._unlock_file_win32
else:
log.warning('FileCache locking not supported on this system!')
self._lock_file = self._lock_file_dummy
self._unlock_file = self._unlock_file_dummy
def _get_path(self, key):
md5 = hashlib.md5()
md5.update(key.encode('utf-8'))
return os.path.join(self.cache_dir, md5.hexdigest())
def _lock_file_dummy(self, path, exclusive=True):
return None
def _unlock_file_dummy(self, lock):
return
def _lock_file_posix(self, path, exclusive=True):
lock_path = path + '.lock'
if exclusive is True:
f_lock = open(lock_path, 'w')
fcntl.lockf(f_lock, fcntl.LOCK_EX)
else:
f_lock = open(lock_path, 'r')
fcntl.lockf(f_lock, fcntl.LOCK_SH)
if os.path.exists(lock_path) is False:
f_lock.close()
return None
return f_lock
def _unlock_file_posix(self, lock):
lock.close()
def _lock_file_win32(self, path, exclusive=True):
# TODO: implement
return None
def _unlock_file_win32(self, lock):
# TODO: implement
return
def _delete_file(self, path):
os.remove(path)
if os.path.exists(path + '.lock'):
os.remove(path + '.lock')
def store(self, key, value):
path = self._get_path(key)
self.lock.acquire()
try:
# acquire lock and open file
f_lock = self._lock_file(path)
datafile = open(path, 'wb')
# write data
pickle.dump((time.time(), value), datafile)
# close and unlock file
datafile.close()
self._unlock_file(f_lock)
finally:
self.lock.release()
def get(self, key, timeout=None):
return self._get(self._get_path(key), timeout)
def _get(self, path, timeout):
if os.path.exists(path) is False:
# no record
return None
self.lock.acquire()
try:
# acquire lock and open
f_lock = self._lock_file(path, False)
datafile = open(path, 'rb')
# read pickled object
created_time, value = pickle.load(datafile)
datafile.close()
# check if value is expired
if timeout is None:
timeout = self.timeout
if timeout > 0:
if (time.time() - created_time) >= timeout:
# expired! delete from cache
value = None
self._delete_file(path)
# unlock and return result
self._unlock_file(f_lock)
return value
finally:
self.lock.release()
def count(self):
c = 0
for entry in os.listdir(self.cache_dir):
if entry.endswith('.lock'):
continue
c += 1
return c
def cleanup(self):
for entry in os.listdir(self.cache_dir):
if entry.endswith('.lock'):
continue
self._get(os.path.join(self.cache_dir, entry), None)
def flush(self):
for entry in os.listdir(self.cache_dir):
if entry.endswith('.lock'):
continue
self._delete_file(os.path.join(self.cache_dir, entry))
class MemCacheCache(Cache):
"""Cache interface"""
def __init__(self, client, timeout=60):
"""Initialize the cache
client: The memcache client
timeout: number of seconds to keep a cached entry
"""
self.client = client
self.timeout = timeout
def store(self, key, value):
"""Add new record to cache
key: entry key
value: data of entry
"""
self.client.set(key, value, time=self.timeout)
def get(self, key, timeout=None):
"""Get cached entry if exists and not expired
key: which entry to get
timeout: override timeout with this value [optional].
DOES NOT WORK HERE
"""
return self.client.get(key)
def count(self):
"""Get count of entries currently stored in cache. RETURN 0"""
raise NotImplementedError
def cleanup(self):
"""Delete any expired entries in cache. NO-OP"""
raise NotImplementedError
def flush(self):
"""Delete all cached entries. NO-OP"""
raise NotImplementedError
class RedisCache(Cache):
"""Cache running in a redis server"""
def __init__(self, client,
timeout=60,
keys_container='tweepy:keys',
pre_identifier='tweepy:'):
Cache.__init__(self, timeout)
self.client = client
self.keys_container = keys_container
self.pre_identifier = pre_identifier
def _is_expired(self, entry, timeout):
# Returns true if the entry has expired
return timeout > 0 and (time.time() - entry[0]) >= timeout
def store(self, key, value):
"""Store the key, value pair in our redis server"""
# Prepend tweepy to our key,
# this makes it easier to identify tweepy keys in our redis server
key = self.pre_identifier + key
# Get a pipe (to execute several redis commands in one step)
pipe = self.client.pipeline()
# Set our values in a redis hash (similar to python dict)
pipe.set(key, pickle.dumps((time.time(), value)))
# Set the expiration
pipe.expire(key, self.timeout)
# Add the key to a set containing all the keys
pipe.sadd(self.keys_container, key)
# Execute the instructions in the redis server
pipe.execute()
def get(self, key, timeout=None):
"""Given a key, returns an element from the redis table"""
key = self.pre_identifier + key
# Check to see if we have this key
unpickled_entry = self.client.get(key)
if not unpickled_entry:
# No hit, return nothing
return None
entry = pickle.loads(unpickled_entry)
# Use provided timeout in arguments if provided
# otherwise use the one provided during init.
if timeout is None:
timeout = self.timeout
# Make sure entry is not expired
if self._is_expired(entry, timeout):
# entry expired, delete and return nothing
self.delete_entry(key)
return None
# entry found and not expired, return it
return entry[1]
def count(self):
"""Note: This is not very efficient,
since it retrieves all the keys from the redis
server to know how many keys we have"""
return len(self.client.smembers(self.keys_container))
def delete_entry(self, key):
"""Delete an object from the redis table"""
pipe = self.client.pipeline()
pipe.srem(self.keys_container, key)
pipe.delete(key)
pipe.execute()
def cleanup(self):
"""Cleanup all the expired keys"""
keys = self.client.smembers(self.keys_container)
for key in keys:
entry = self.client.get(key)
if entry:
entry = pickle.loads(entry)
if self._is_expired(entry, self.timeout):
self.delete_entry(key)
def flush(self):
"""Delete all entries from the cache"""
keys = self.client.smembers(self.keys_container)
for key in keys:
self.delete_entry(key)
class MongodbCache(Cache):
"""A simple pickle-based MongoDB cache system."""
def __init__(self, db, timeout=3600, collection='tweepy_cache'):
"""Should receive a "database" cursor from pymongo."""
Cache.__init__(self, timeout)
self.timeout = timeout
self.col = db[collection]
self.col.create_index('created', expireAfterSeconds=timeout)
def store(self, key, value):
from bson.binary import Binary
now = datetime.datetime.utcnow()
blob = Binary(pickle.dumps(value))
self.col.insert({'created': now, '_id': key, 'value': blob})
def get(self, key, timeout=None):
if timeout:
raise NotImplementedError
obj = self.col.find_one({'_id': key})
if obj:
return pickle.loads(obj['value'])
def count(self):
return self.col.find({}).count()
def delete_entry(self, key):
return self.col.remove({'_id': key})
def cleanup(self):
"""MongoDB will automatically clear expired keys."""
pass
def flush(self):
self.col.drop()
self.col.create_index('created', expireAfterSeconds=self.timeout)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,305 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
from math import inf
from tweepy.errors import TweepyException
from tweepy.parsers import ModelParser, RawParser
class Cursor:
""":class:`Cursor` can be used to paginate for any :class:`API` methods that
support pagination
Parameters
----------
method
:class:`API` method to paginate for
args
Positional arguments to pass to ``method``
kwargs
Keyword arguments to pass to ``method``
"""
def __init__(self, method, *args, **kwargs):
if hasattr(method, 'pagination_mode'):
if method.pagination_mode == 'cursor':
self.iterator = CursorIterator(method, *args, **kwargs)
elif method.pagination_mode == 'dm_cursor':
self.iterator = DMCursorIterator(method, *args, **kwargs)
elif method.pagination_mode == 'id':
self.iterator = IdIterator(method, *args, **kwargs)
elif method.pagination_mode == "next":
self.iterator = NextIterator(method, *args, **kwargs)
elif method.pagination_mode == 'page':
self.iterator = PageIterator(method, *args, **kwargs)
else:
raise TweepyException('Invalid pagination mode.')
else:
raise TweepyException('This method does not perform pagination')
def pages(self, limit=inf):
"""Retrieve the page for each request
Parameters
----------
limit
Maximum number of pages to iterate over
Returns
-------
CursorIterator or DMCursorIterator or IdIterator or NextIterator or \
PageIterator
Iterator to iterate through pages
"""
self.iterator.limit = limit
return self.iterator
def items(self, limit=inf):
"""Retrieve the items in each page/request
Parameters
----------
limit
Maximum number of items to iterate over
Returns
-------
ItemIterator
Iterator to iterate through items
"""
iterator = ItemIterator(self.iterator)
iterator.limit = limit
return iterator
class BaseIterator:
def __init__(self, method, *args, **kwargs):
self.method = method
self.args = args
self.kwargs = kwargs
self.limit = inf
def __next__(self):
return self.next()
def next(self):
raise NotImplementedError
def prev(self):
raise NotImplementedError
def __iter__(self):
return self
class CursorIterator(BaseIterator):
def __init__(self, method, *args, **kwargs):
BaseIterator.__init__(self, method, *args, **kwargs)
start_cursor = self.kwargs.pop('cursor', None)
self.next_cursor = start_cursor or -1
self.prev_cursor = start_cursor or 0
self.num_tweets = 0
def next(self):
if self.next_cursor == 0 or self.num_tweets >= self.limit:
raise StopIteration
data, cursors = self.method(cursor=self.next_cursor,
*self.args,
**self.kwargs)
self.prev_cursor, self.next_cursor = cursors
if len(data) == 0:
raise StopIteration
self.num_tweets += 1
return data
def prev(self):
if self.prev_cursor == 0:
raise TweepyException('Can not page back more, at first page')
data, self.next_cursor, self.prev_cursor = self.method(cursor=self.prev_cursor,
*self.args,
**self.kwargs)
self.num_tweets -= 1
return data
class DMCursorIterator(BaseIterator):
def __init__(self, method, *args, **kwargs):
BaseIterator.__init__(self, method, *args, **kwargs)
self.next_cursor = self.kwargs.pop('cursor', None)
self.page_count = 0
def next(self):
if self.next_cursor == -1 or self.page_count >= self.limit:
raise StopIteration
data = self.method(cursor=self.next_cursor, return_cursors=True, *self.args, **self.kwargs)
self.page_count += 1
if isinstance(data, tuple):
data, self.next_cursor = data
else:
self.next_cursor = -1
return data
def prev(self):
raise TweepyException('This method does not allow backwards pagination')
class IdIterator(BaseIterator):
def __init__(self, method, *args, **kwargs):
BaseIterator.__init__(self, method, *args, **kwargs)
self.max_id = self.kwargs.pop('max_id', None)
self.num_tweets = 0
self.results = []
self.model_results = []
self.index = 0
def next(self):
"""Fetch a set of items with IDs less than current set."""
if self.num_tweets >= self.limit:
raise StopIteration
if self.index >= len(self.results) - 1:
data = self.method(max_id=self.max_id, parser=RawParser(), *self.args, **self.kwargs)
model = ModelParser().parse(
data, api = self.method.__self__,
payload_list=self.method.payload_list,
payload_type=self.method.payload_type
)
result = self.method.__self__.parser.parse(
data, api = self.method.__self__,
payload_list=self.method.payload_list,
payload_type=self.method.payload_type
)
if len(self.results) != 0:
self.index += 1
self.results.append(result)
self.model_results.append(model)
else:
self.index += 1
result = self.results[self.index]
model = self.model_results[self.index]
if len(result) == 0:
raise StopIteration
# TODO: Make this not dependant on the parser making max_id and
# since_id available
self.max_id = model.max_id
self.num_tweets += 1
return result
def prev(self):
"""Fetch a set of items with IDs greater than current set."""
if self.num_tweets >= self.limit:
raise StopIteration
self.index -= 1
if self.index < 0:
# There's no way to fetch a set of tweets directly 'above' the
# current set
raise StopIteration
data = self.results[self.index]
self.max_id = self.model_results[self.index].max_id
self.num_tweets += 1
return data
class PageIterator(BaseIterator):
def __init__(self, method, *args, **kwargs):
BaseIterator.__init__(self, method, *args, **kwargs)
self.current_page = 1
# Keep track of previous page of items to handle Twitter API issue with
# duplicate pages
# https://twittercommunity.com/t/odd-pagination-behavior-with-get-users-search/148502
# https://github.com/tweepy/tweepy/issues/1465
# https://github.com/tweepy/tweepy/issues/958
self.previous_items = []
def next(self):
if self.current_page > self.limit:
raise StopIteration
items = self.method(page=self.current_page, *self.args, **self.kwargs)
if len(items) == 0:
raise StopIteration
for item in items:
if item in self.previous_items:
raise StopIteration
self.current_page += 1
self.previous_items = items
return items
def prev(self):
if self.current_page == 1:
raise TweepyException('Can not page back more, at first page')
self.current_page -= 1
return self.method(page=self.current_page, *self.args, **self.kwargs)
class NextIterator(BaseIterator):
def __init__(self, method, *args, **kwargs):
BaseIterator.__init__(self, method, *args, **kwargs)
self.next_token = self.kwargs.pop('next', None)
self.page_count = 0
def next(self):
if self.next_token == -1 or self.page_count >= self.limit:
raise StopIteration
data = self.method(next=self.next_token, return_cursors=True, *self.args, **self.kwargs)
self.page_count += 1
if isinstance(data, tuple):
data, self.next_token = data
else:
self.next_token = -1
return data
def prev(self):
raise TweepyException('This method does not allow backwards pagination')
class ItemIterator(BaseIterator):
def __init__(self, page_iterator):
self.page_iterator = page_iterator
self.limit = inf
self.current_page = None
self.page_index = -1
self.num_tweets = 0
def next(self):
if self.num_tweets >= self.limit:
raise StopIteration
if self.current_page is None or self.page_index == len(self.current_page) - 1:
# Reached end of current page, get the next page...
self.current_page = next(self.page_iterator)
while len(self.current_page) == 0:
self.current_page = next(self.page_iterator)
self.page_index = -1
self.page_index += 1
self.num_tweets += 1
return self.current_page[self.page_index]
def prev(self):
if self.current_page is None:
raise TweepyException('Can not go back more, at first page')
if self.page_index == 0:
# At the beginning of the current page, move to next...
self.current_page = self.page_iterator.prev()
self.page_index = len(self.current_page)
if self.page_index == 0:
raise TweepyException('No more items')
self.page_index -= 1
self.num_tweets -= 1
return self.current_page[self.page_index]

View File

@@ -0,0 +1,158 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
from tweepy.mixins import DataMapping, HashableID
from tweepy.tweet import ReferencedTweet
from tweepy.utils import parse_datetime
#: All the potential fields for :class:`DirectMessageEvent` objects
DIRECT_MESSAGE_EVENT_FIELDS = [
"attachments",
"created_at",
"dm_conversation_id",
"event_type",
"id",
"participant_ids",
"referenced_tweets",
"sender_id",
"text",
]
#: Alias for :const:`DIRECT_MESSAGE_EVENT_FIELDS`
DM_EVENT_FIELDS = DIRECT_MESSAGE_EVENT_FIELDS
class DirectMessageEvent(HashableID, DataMapping):
"""Direct Message (DM) conversations are made up of events. The Twitter API
v2 currently supports three event types: MessageCreate, ParticipantsJoin,
and ParticipantsLeave.
DM event objects are returned by the `Direct Message lookup`_ endpoints,
and a MessageCreate event is created when Direct Messages are successfully
created with the `Manage Direct Messages`_ endpoints.
When requesting DM events, there are three default event object attributes,
or fields, included: ``id``, ``event_type``, and ``text``. To receive
additional event `fields`_, use the fields parameter ``dm_event.fields`` to
select others. Other available event fields include the following:
``dm_conversation_id``, ``created_at``, ``sender_id`, ``attachments``,
``participant_ids``, and ``referenced_tweets``.
Several of these fields provide the IDs of other Twitter objects related to
the Direct Message event:
* ``sender_id`` - The ID of the account that sent the message, or who
invited a participant to a group conversation
* ``partricipants_ids`` - An array of account IDs. For ParticipantsJoin and
ParticipantsLeave events this array will contain a single ID of the
account that created the event
* ``attachments`` - Provides media IDs for content that has been uploaded
to Twitter by the sender
* ``referenced_tweets`` - If a Tweet URL is found in the text field, the ID
of that Tweet is included in the response
The ``sender_id``, ``participant_ids``, ``referenced_tweets.id``, and
``attachments.media_keys`` `expansions`_ are available to expand on these
Twitter object IDs.
.. versionadded:: 4.12
Attributes
----------
data : dict
The JSON data representing the Direct Message event.
id : int
The unique identifier of the event.
event_type : str
Describes the type of event. Three types are currently supported:
* MessageCreate
* ParticipantsJoin
* ParticipantsLeave
text : str | None
The actual UTF-8 text of the Direct Message.
sender_id : int | None
ID of the User creating the event. To expand this object in the
response, include ``sender_id`` as an expansion and use the
``user.fields`` query parameter to specify User object attributes of
interest.
participant_ids : list[int] | None
IDs of the participants joining and leaving a group conversation. Also
used when creating new group conversations. To expand this object in
the response, include ``participant_ids`` as an expansion and use the
``user.fields`` query parameter to specify User object attributes of
interest.
dm_conversation_id : str | None
The unique identifier of the conversation the event is apart of.
created_at : datetime.datetime | None
Creation time (UTC) of the Tweet.
referenced_tweets : list[ReferencedTweet] | None
ID for any Tweet mentioned in the Direct Message text. To expand this
object in the response, include ``referenced_tweets.id`` as an
expansion and use the ``tweet.fields`` query parameter to specify Tweet
object attributes of interest.
attachments : dict | None
For Direct Messages with attached Media, provides the media key of the
uploaded content (photo, video, or GIF. To expand this object in the
response, include ``attachments.media_keys`` as an expansion and use
the ``media.fields`` query parameter to specify media object attributes
of interest. Currently, one attachment is supported.
References
----------
https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/dm-events
.. _Direct Message lookup: https://developer.twitter.com/en/docs/twitter-api/direct-messages/lookup/introduction
.. _Manage Direct Messages: https://developer.twitter.com/en/docs/twitter-api/direct-messages/manage/introduction
.. _fields: https://developer.twitter.com/en/docs/twitter-api/fields
.. _expansions: https://developer.twitter.com/en/docs/twitter-api/expansions
"""
__slots__ = (
"data", "id", "event_type", "attachments", "created_at",
"dm_conversation_id", "participant_ids", "referenced_tweets",
"sender_id", "text"
)
def __init__(self, data):
self.data = data
self.id = int(data["id"])
self.event_type = data["event_type"]
self.text = data.get("text")
self.sender_id = data.get("sender_id")
if self.sender_id is not None:
self.sender_id = int(self.sender_id)
self.participant_ids = data.get("participant_ids")
if self.participant_ids is not None:
self.participant_ids = list(map(int, self.participant_ids))
self.dm_conversation_id = data.get("dm_conversation_id")
self.created_at = data.get("created_at")
if self.created_at is not None:
self.created_at = parse_datetime(self.created_at)
self.referenced_tweets = data.get("referenced_tweets")
if self.referenced_tweets is not None:
self.referenced_tweets = [
ReferencedTweet(referenced_tweet)
for referenced_tweet in self.referenced_tweets
]
self.attachments = data.get("attachments")
def __repr__(self):
representation = (
f"<Direct Message Event id={self.id} event_type={self.event_type}"
)
if self.text is not None:
representation += f" text={repr(self.text)}"
representation += '>'
return representation
def __str__(self):
return self.text or self.__repr__()

View File

@@ -0,0 +1,152 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
import requests
class TweepyException(Exception):
"""Base exception for Tweepy
.. versionadded:: 4.0
"""
pass
class HTTPException(TweepyException):
"""HTTPException()
Exception raised when an HTTP request fails
.. versionadded:: 4.0
.. versionchanged:: 4.10
``response`` attribute can be an instance of
:class:`aiohttp.ClientResponse`
Attributes
----------
response : requests.Response | aiohttp.ClientResponse
Requests Response from the Twitter API
api_errors : list[dict[str, int | str]]
The errors the Twitter API responded with, if any
api_codes : list[int]
The error codes the Twitter API responded with, if any
api_messages : list[str]
The error messages the Twitter API responded with, if any
"""
def __init__(self, response, *, response_json=None):
self.response = response
self.api_errors = []
self.api_codes = []
self.api_messages = []
try:
status_code = response.status_code
except AttributeError:
# response is an instance of aiohttp.ClientResponse
status_code = response.status
if response_json is None:
try:
response_json = response.json()
except requests.JSONDecodeError:
super().__init__(f"{status_code} {response.reason}")
return
errors = response_json.get("errors", [])
# Use := when support for Python 3.7 is dropped
if "error" in response_json:
errors.append(response_json["error"])
error_text = ""
for error in errors:
self.api_errors.append(error)
if isinstance(error, str):
self.api_messages.append(error)
error_text += '\n' + error
continue
if "code" in error:
self.api_codes.append(error["code"])
if "message" in error:
self.api_messages.append(error["message"])
if "code" in error and "message" in error:
error_text += f"\n{error['code']} - {error['message']}"
elif "message" in error:
error_text += '\n' + error["message"]
# Use := when support for Python 3.7 is dropped
if not error_text and "detail" in response_json:
self.api_messages.append(response_json["detail"])
error_text = '\n' + response_json["detail"]
super().__init__(
f"{status_code} {response.reason}{error_text}"
)
class BadRequest(HTTPException):
"""BadRequest()
Exception raised for a 400 HTTP status code
.. versionadded:: 4.0
"""
pass
class Unauthorized(HTTPException):
"""Unauthorized()
Exception raised for a 401 HTTP status code
.. versionadded:: 4.0
"""
pass
class Forbidden(HTTPException):
"""Forbidden()
Exception raised for a 403 HTTP status code
.. versionadded:: 4.0
"""
pass
class NotFound(HTTPException):
"""NotFound()
Exception raised for a 404 HTTP status code
.. versionadded:: 4.0
"""
pass
class TooManyRequests(HTTPException):
"""TooManyRequests()
Exception raised for a 429 HTTP status code
.. versionadded:: 4.0
"""
pass
class TwitterServerError(HTTPException):
"""TwitterServerError()
Exception raised for a 5xx HTTP status code
.. versionadded:: 4.0
"""
pass

View File

@@ -0,0 +1,90 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
from tweepy.mixins import DataMapping, HashableID
from tweepy.utils import parse_datetime
#: All the potential fields for :class:`List` objects
LIST_FIELDS = [
"created_at",
"description",
"follower_count",
"id",
"member_count",
"name",
"owner_id",
"private",
]
class List(HashableID, DataMapping):
"""The list object contains `Twitter Lists`_ metadata describing the
referenced List. The List object is the primary object returned in the List
lookup endpoint. When requesting additional List fields on this endpoint,
simply use the fields parameter ``list.fields``.
At the moment, the List object cannot be found as a child object from any
other data object. However, user objects can be found and expanded in the
user resource. These objects are available for expansion by adding
``owner_id`` to the ``expansions`` query parameter. Use the expansion with
the field parameter: ``list.fields`` when requesting additional fields to
complete the primary List object and ``user.fields`` to complete the
expansion object.
.. versionadded:: 4.4
Attributes
----------
data : dict
The JSON data representing the List.
id : str
The unique identifier of this List.
name : str
The name of the List, as defined when creating the List.
created_at : datetime.datetime | None
The UTC datetime that the List was created on Twitter.
description : str | None
A brief description to let users know about the List.
follower_count : int | None
Shows how many users follow this List,
member_count : int | None
Shows how many members are part of this List.
private : bool | None
Indicates if the List is private.
owner_id : str | None
Unique identifier of this List's owner.
References
----------
https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/lists
.. _Twitter Lists: https://help.twitter.com/en/using-twitter/twitter-lists
"""
__slots__ = (
"data", "id", "name", "created_at", "description", "follower_count",
"member_count", "private", "owner_id"
)
def __init__(self, data):
self.data = data
self.id = data["id"]
self.name = data["name"]
self.created_at = data.get("created_at")
if self.created_at is not None:
self.created_at = parse_datetime(self.created_at)
self.description = data.get("description")
self.follower_count = data.get("follower_count")
self.member_count = data.get("member_count")
self.private = data.get("private")
self.owner_id = data.get("owner_id")
def __repr__(self):
return f"<List id={self.id} name={self.name}>"
def __str__(self):
return self.name

View File

@@ -0,0 +1,127 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
from tweepy.mixins import DataMapping
#: All the potential fields for :class:`Media` objects
MEDIA_FIELDS = [
"alt_text",
"duration_ms",
"height",
"media_key",
"non_public_metrics",
"organic_metrics",
"preview_image_url",
"promoted_metrics",
"public_metrics",
"type",
"url",
"variants",
"width",
]
class Media(DataMapping):
"""Media refers to any image, GIF, or video attached to a Tweet. The media
object is not a primary object on any endpoint, but can be found and
expanded in the Tweet object.
The object is available for expansion with
``?expansions=attachments.media_keys`` to get the condensed object with
only default fields. Use the expansion with the field parameter:
``media.fields`` when requesting additional fields to complete the object..
.. versionadded:: 4.0
.. versionchanged:: 4.5
Added ``url`` field
.. versionchanged:: 4.12
Added ``variants`` field
Attributes
----------
data : dict
The JSON data representing the media.
media_key : str
Unique identifier of the expanded media content.
type : str
Type of content (animated_gif, photo, video).
url : str | None
A direct URL to the media file on Twitter.
duration_ms : int | None
Available when type is video. Duration in milliseconds of the video.
height : int | None
Height of this content in pixels.
non_public_metrics : dict | None
Non-public engagement metrics for the media content at the time of the
request.
Requires user context authentication.
organic_metrics: dict | None
Engagement metrics for the media content, tracked in an organic
context, at the time of the request.
Requires user context authentication.
preview_image_url : str | None
URL to the static placeholder preview of this content.
promoted_metrics : dict | None
Engagement metrics for the media content, tracked in a promoted
context, at the time of the request.
Requires user context authentication.
public_metrics : dict | None
Public engagement metrics for the media content at the time of the
request.
width : int | None
Width of this content in pixels.
alt_text : str | None
A description of an image to enable and support accessibility. Can be
up to 1000 characters long. Alt text can only be added to images at the
moment.
variants: list[dict] | None
Each media object may have multiple display or playback variants,
with different resolutions or formats
References
----------
https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/media
"""
__slots__ = (
"data", "media_key", "url", "type", "duration_ms", "height",
"non_public_metrics", "organic_metrics", "preview_image_url",
"promoted_metrics", "public_metrics", "width", "alt_text",
"variants"
)
def __init__(self, data):
self.data = data
self.media_key = data["media_key"]
self.type = data["type"]
self.url = data.get("url")
self.duration_ms = data.get("duration_ms")
self.height = data.get("height")
self.non_public_metrics = data.get("non_public_metrics")
self.organic_metrics = data.get("organic_metrics")
self.preview_image_url = data.get("preview_image_url")
self.promoted_metrics = data.get("promoted_metrics")
self.public_metrics = data.get("public_metrics")
self.width = data.get("width")
self.alt_text = data.get("alt_text")
self.variants = data.get("variants")
def __eq__(self, other):
if isinstance(other, self.__class__):
return self.media_key == other.media_key
return NotImplemented
def __hash__(self):
return hash(self.media_key)
def __repr__(self):
return f"<Media media_key={self.media_key} type={self.type}>"

View File

@@ -0,0 +1,47 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
from collections.abc import Mapping
class EqualityComparableID:
__slots__ = ()
def __eq__(self, other):
if isinstance(other, self.__class__):
return self.id == other.id
return NotImplemented
class HashableID(EqualityComparableID):
__slots__ = ()
def __hash__(self):
return self.id
class DataMapping(Mapping):
__slots__ = ()
def __contains__(self, item):
return item in self.data
def __getattr__(self, name):
try:
return self.data[name]
except KeyError:
raise AttributeError from None
def __getitem__(self, key):
try:
return getattr(self, key)
except AttributeError:
raise KeyError from None
def __iter__(self):
return iter(self.data)
def __len__(self):
return len(self.data)

View File

@@ -0,0 +1,473 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
from email.utils import parsedate_to_datetime
from tweepy.mixins import HashableID
class Model:
def __init__(self, api=None):
self._api = api
def __getstate__(self):
pickle = self.__dict__.copy()
pickle.pop('_api', None)
return pickle
@classmethod
def parse(cls, api, json):
"""Parse a JSON object into a model instance."""
raise NotImplementedError
@classmethod
def parse_list(cls, api, json_list):
"""
Parse a list of JSON objects into
a result set of model instances.
"""
results = ResultSet()
if isinstance(json_list, dict):
# Handle map parameter for statuses/lookup
if 'id' in json_list:
for _id, obj in json_list['id'].items():
if obj:
results.append(cls.parse(api, obj))
else:
results.append(cls.parse(api, {'id': int(_id)}))
return results
# Handle premium search
if 'results' in json_list:
json_list = json_list['results']
for obj in json_list:
if obj:
results.append(cls.parse(api, obj))
return results
def __repr__(self):
state = [f'{k}={v!r}' for (k, v) in vars(self).items()]
return f'{self.__class__.__name__}({", ".join(state)})'
class ResultSet(list):
"""A list like object that holds results from a Twitter API query."""
def __init__(self, max_id=None, since_id=None):
super().__init__()
self._max_id = max_id
self._since_id = since_id
@property
def max_id(self):
if self._max_id:
return self._max_id
ids = self.ids()
# Max_id is always set to the *smallest* id, minus one, in the set
return (min(ids) - 1) if ids else None
@property
def since_id(self):
if self._since_id:
return self._since_id
ids = self.ids()
# Since_id is always set to the *greatest* id in the set
return max(ids) if ids else None
def ids(self):
return [item.id for item in self if hasattr(item, 'id')]
class BoundingBox(Model):
@classmethod
def parse(cls, api, json):
result = cls(api)
if json is not None:
for k, v in json.items():
setattr(result, k, v)
return result
def origin(self):
"""
Return longitude, latitude of southwest (bottom, left) corner of
bounding box, as a tuple.
This assumes that bounding box is always a rectangle, which
appears to be the case at present.
"""
return tuple(self.coordinates[0][0])
def corner(self):
"""
Return longitude, latitude of northeast (top, right) corner of
bounding box, as a tuple.
This assumes that bounding box is always a rectangle, which
appears to be the case at present.
"""
return tuple(self.coordinates[0][2])
class DirectMessage(Model):
@classmethod
def parse(cls, api, json):
dm = cls(api)
if "event" in json:
json = json["event"]
setattr(dm, '_json', json)
for k, v in json.items():
setattr(dm, k, v)
return dm
@classmethod
def parse_list(cls, api, json_list):
if isinstance(json_list, list):
item_list = json_list
else:
item_list = json_list['events']
results = ResultSet()
for obj in item_list:
results.append(cls.parse(api, obj))
return results
def delete(self):
return self._api.delete_direct_message(self.id)
class Friendship(Model):
@classmethod
def parse(cls, api, json):
relationship = json['relationship']
# parse source
source = cls(api)
setattr(source, '_json', relationship['source'])
for k, v in relationship['source'].items():
setattr(source, k, v)
# parse target
target = cls(api)
setattr(target, '_json', relationship['target'])
for k, v in relationship['target'].items():
setattr(target, k, v)
return source, target
class List(Model):
@classmethod
def parse(cls, api, json):
lst = List(api)
setattr(lst, '_json', json)
for k, v in json.items():
if k == 'user':
setattr(lst, k, User.parse(api, v))
elif k == 'created_at':
setattr(lst, k, parsedate_to_datetime(v))
else:
setattr(lst, k, v)
return lst
@classmethod
def parse_list(cls, api, json_list, result_set=None):
results = ResultSet()
if isinstance(json_list, dict):
json_list = json_list['lists']
for obj in json_list:
results.append(cls.parse(api, obj))
return results
def update(self, **kwargs):
return self._api.update_list(list_id=self.id, **kwargs)
def destroy(self):
return self._api.destroy_list(list_id=self.id)
def timeline(self, **kwargs):
return self._api.list_timeline(list_id=self.id, **kwargs)
def add_member(self, id):
return self._api.add_list_member(list_id=self.id, user_id=id)
def remove_member(self, id):
return self._api.remove_list_member(list_id=self.id, user_id=id)
def members(self, **kwargs):
return self._api.get_list_members(list_id=self.id, **kwargs)
def subscribe(self):
return self._api.subscribe_list(list_id=self.id)
def unsubscribe(self):
return self._api.unsubscribe_list(list_id=self.id)
def subscribers(self, **kwargs):
return self._api.get_list_subscribers(list_id=self.id, **kwargs)
class Media(Model):
@classmethod
def parse(cls, api, json):
media = cls(api)
for k, v in json.items():
setattr(media, k, v)
return media
class Place(Model):
@classmethod
def parse(cls, api, json):
place = cls(api)
for k, v in json.items():
if k == 'bounding_box':
# bounding_box value may be null (None.)
# Example: "United States" (id=96683cc9126741d1)
if v is not None:
t = BoundingBox.parse(api, v)
else:
t = v
setattr(place, k, t)
elif k == 'contained_within':
# contained_within is a list of Places.
setattr(place, k, Place.parse_list(api, v))
else:
setattr(place, k, v)
return place
@classmethod
def parse_list(cls, api, json_list):
if isinstance(json_list, list):
item_list = json_list
else:
item_list = json_list['result']['places']
results = ResultSet()
for obj in item_list:
results.append(cls.parse(api, obj))
return results
class Relationship(Model):
@classmethod
def parse(cls, api, json):
result = cls(api)
for k, v in json.items():
if k == 'connections':
setattr(result, 'is_following', 'following' in v)
setattr(result, 'is_followed_by', 'followed_by' in v)
setattr(result, 'is_muted', 'muting' in v)
setattr(result, 'is_blocked', 'blocking' in v)
setattr(result, 'is_following_requested', 'following_requested' in v)
setattr(result, 'no_relationship', 'none' in v)
else:
setattr(result, k, v)
return result
class SavedSearch(Model):
@classmethod
def parse(cls, api, json):
ss = cls(api)
for k, v in json.items():
if k == 'created_at':
setattr(ss, k, parsedate_to_datetime(v))
else:
setattr(ss, k, v)
return ss
def destroy(self):
return self._api.destroy_saved_search(self.id)
class SearchResults(ResultSet):
@classmethod
def parse(cls, api, json):
metadata = json['search_metadata']
results = SearchResults()
results.refresh_url = metadata.get('refresh_url')
results.completed_in = metadata.get('completed_in')
results.query = metadata.get('query')
results.count = metadata.get('count')
results.next_results = metadata.get('next_results')
try:
status_model = api.parser.model_factory.status
except AttributeError:
status_model = Status
for status in json['statuses']:
results.append(status_model.parse(api, status))
return results
class Status(Model, HashableID):
@classmethod
def parse(cls, api, json):
status = cls(api)
setattr(status, '_json', json)
for k, v in json.items():
if k == 'user':
try:
user = api.parser.model_factory.user.parse(api, v)
except AttributeError:
user = User.parse(api, v)
setattr(status, 'author', user)
setattr(status, 'user', user) # DEPRECIATED
elif k == 'created_at':
setattr(status, k, parsedate_to_datetime(v))
elif k == 'source':
if '<' in v:
# At this point, v should be of the format:
# <a href="{source_url}" rel="nofollow">{source}</a>
setattr(status, k, v[v.find('>') + 1:v.rfind('<')])
start = v.find('"') + 1
end = v.find('"', start)
setattr(status, 'source_url', v[start:end])
else:
setattr(status, k, v)
setattr(status, 'source_url', None)
elif k == 'retweeted_status':
setattr(status, k, Status.parse(api, v))
elif k == 'quoted_status':
setattr(status, k, Status.parse(api, v))
elif k == 'place':
if v is not None:
setattr(status, k, Place.parse(api, v))
else:
setattr(status, k, None)
else:
setattr(status, k, v)
return status
def destroy(self):
return self._api.destroy_status(self.id)
def retweet(self):
return self._api.retweet(self.id)
def retweets(self):
return self._api.get_retweets(self.id)
def favorite(self):
return self._api.create_favorite(self.id)
class User(Model, HashableID):
@classmethod
def parse(cls, api, json):
user = cls(api)
setattr(user, '_json', json)
for k, v in json.items():
if k == 'created_at':
setattr(user, k, parsedate_to_datetime(v))
elif k == 'status':
setattr(user, k, Status.parse(api, v))
elif k == 'following':
# twitter sets this to null if it is false
if v is True:
setattr(user, k, True)
else:
setattr(user, k, False)
else:
setattr(user, k, v)
return user
@classmethod
def parse_list(cls, api, json_list):
if isinstance(json_list, list):
item_list = json_list
else:
item_list = json_list['users']
results = ResultSet()
for obj in item_list:
results.append(cls.parse(api, obj))
return results
def timeline(self, **kwargs):
return self._api.user_timeline(user_id=self.id, **kwargs)
def friends(self, **kwargs):
return self._api.get_friends(user_id=self.id, **kwargs)
def followers(self, **kwargs):
return self._api.get_followers(user_id=self.id, **kwargs)
def follow(self):
self._api.create_friendship(user_id=self.id)
self.following = True
def unfollow(self):
self._api.destroy_friendship(user_id=self.id)
self.following = False
def list_memberships(self, *args, **kwargs):
return self._api.get_list_memberships(user_id=self.id, *args, **kwargs)
def list_ownerships(self, *args, **kwargs):
return self._api.get_list_ownerships(user_id=self.id, *args, **kwargs)
def list_subscriptions(self, *args, **kwargs):
return self._api.get_list_subscriptions(
user_id=self.id, *args, **kwargs
)
def lists(self, *args, **kwargs):
return self._api.get_lists(user_id=self.id, *args, **kwargs)
def follower_ids(self, *args, **kwargs):
return self._api.get_follower_ids(user_id=self.id, *args, **kwargs)
class IDModel(Model):
@classmethod
def parse(cls, api, json):
if isinstance(json, list):
return json
else:
return json['ids']
class JSONModel(Model):
@classmethod
def parse(cls, api, json):
return json
class ModelFactory:
"""
Used by parsers for creating instances
of models. You may subclass this factory
to add your own extended models.
"""
bounding_box = BoundingBox
direct_message = DirectMessage
friendship = Friendship
list = List
media = Media
place = Place
relationship = Relationship
saved_search = SavedSearch
search_results = SearchResults
status = Status
user = User
ids = IDModel
json = JSONModel

View File

@@ -0,0 +1,144 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
from math import inf
import requests
from tweepy.client import Response
class Paginator:
"""Paginator( \
self, method, *args, limit=inf, pagination_token=None, **kwargs \
)
:class:`Paginator` can be used to paginate for any :class:`Client`
methods that support pagination
.. note::
When the returned response from the method being passed is of type
:class:`requests.Response`, it will be deserialized in order to parse
the pagination tokens, likely negating any potential performance
benefits from using a :class:`requests.Response` return type.
.. versionadded:: 4.0
Parameters
----------
method
:class:`Client` method to paginate for
args
Positional arguments to pass to ``method``
limit
Maximum number of requests to make to the API
pagination_token
Pagination token to start pagination with
kwargs
Keyword arguments to pass to ``method``
"""
def __init__(self, method, *args, **kwargs):
self.method = method
self.args = args
self.kwargs = kwargs
def __iter__(self):
return PaginationIterator(self.method, *self.args, **self.kwargs)
def __reversed__(self):
return PaginationIterator(self.method, *self.args, reverse=True,
**self.kwargs)
def flatten(self, limit=inf):
"""Flatten paginated data
Parameters
----------
limit
Maximum number of results to yield
"""
if limit <= 0:
return
count = 0
for response in PaginationIterator(
self.method, *self.args, **self.kwargs
):
if isinstance(response, Response):
response_data = response.data or []
elif isinstance(response, dict):
response_data = response.get("data", [])
else:
raise RuntimeError(
f"Paginator.flatten does not support the {type(response)} "
f"return type for {self.method.__qualname__}"
)
for data in response_data:
yield data
count += 1
if count == limit:
return
class PaginationIterator:
def __init__(self, method, *args, limit=inf, pagination_token=None,
reverse=False, **kwargs):
self.method = method
self.args = args
self.limit = limit
self.kwargs = kwargs
self.reverse = reverse
if reverse:
self.previous_token = pagination_token
self.next_token = None
else:
self.previous_token = None
self.next_token = pagination_token
self.count = 0
def __iter__(self):
return self
def __next__(self):
if self.reverse:
pagination_token = self.previous_token
else:
pagination_token = self.next_token
if self.count >= self.limit or self.count and pagination_token is None:
raise StopIteration
# https://twittercommunity.com/t/why-does-timeline-use-pagination-token-while-search-uses-next-token/150963
if self.method.__name__ in (
"search_all_tweets", "search_recent_tweets",
"get_all_tweets_count"
):
self.kwargs["next_token"] = pagination_token
else:
self.kwargs["pagination_token"] = pagination_token
response = self.method(*self.args, **self.kwargs)
if isinstance(response, Response):
meta = response.meta
elif isinstance(response, dict):
meta = response.get("meta", {})
elif isinstance(response, requests.Response):
meta = response.json().get("meta", {})
else:
raise RuntimeError(
f"Unknown {type(response)} return type for "
f"{self.method.__qualname__}"
)
self.previous_token = meta.get("previous_token")
self.next_token = meta.get("next_token")
self.count += 1
return response

View File

@@ -0,0 +1,92 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
import json as json_lib
from tweepy.errors import TweepyException
from tweepy.models import ModelFactory
class Parser:
def parse(self, payload, *args, **kwargs):
"""
Parse the response payload and return the result.
Returns a tuple that contains the result data and the cursors
(or None if not present).
"""
raise NotImplementedError
class RawParser(Parser):
def __init__(self):
pass
def parse(self, payload, *args, **kwargs):
return payload
class JSONParser(Parser):
payload_format = 'json'
def parse(self, payload, *, return_cursors=False, **kwargs):
if not payload:
return
try:
json = json_lib.loads(payload)
except Exception as e:
raise TweepyException(f'Failed to parse JSON payload: {e}')
if return_cursors and isinstance(json, dict):
if 'next' in json:
return json, json['next']
elif 'next_cursor' in json:
if 'previous_cursor' in json:
cursors = json['previous_cursor'], json['next_cursor']
return json, cursors
else:
return json, json['next_cursor']
return json
class ModelParser(JSONParser):
def __init__(self, model_factory=None):
JSONParser.__init__(self)
self.model_factory = model_factory or ModelFactory
def parse(self, payload, *, api=None, payload_list=False,
payload_type=None, return_cursors=False):
try:
if payload_type is None:
return
model = getattr(self.model_factory, payload_type)
except AttributeError:
raise TweepyException(
f'No model for this payload type: {payload_type}'
)
json = JSONParser.parse(self, payload, return_cursors=return_cursors)
if isinstance(json, tuple):
json, cursors = json
else:
cursors = None
try:
if payload_list:
result = model.parse_list(api, json)
else:
result = model.parse(api, json)
except KeyError:
raise TweepyException(
f"Unable to parse response payload: {json}"
) from None
if cursors:
return result, cursors
else:
return result

View File

@@ -0,0 +1,82 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
from tweepy.mixins import DataMapping, HashableID
#: All the potential fields for :class:`Place` objects
PLACE_FIELDS = [
"contained_within",
"country",
"country_code",
"full_name",
"geo",
"id",
"name",
"place_type",
]
class Place(HashableID, DataMapping):
"""The place tagged in a Tweet is not a primary object on any endpoint, but
can be found and expanded in the Tweet resource.
The object is available for expansion with ``?expansions=geo.place_id`` to
get the condensed object with only default fields. Use the expansion with
the field parameter: ``place.fields`` when requesting additional fields to
complete the object.
.. versionadded:: 4.0
Attributes
----------
data : dict
The JSON data representing the place.
full_name : str
A longer-form detailed place name.
id : str
The unique identifier of the expanded place, if this is a point of
interest tagged in the Tweet.
contained_within : list
Returns the identifiers of known places that contain the referenced
place.
country : str | None
The full-length name of the country this place belongs to.
country_code : str | None
The ISO Alpha-2 country code this place belongs to.
geo : dict | None
Contains place details in GeoJSON format.
name : str | None
The short name of this place.
place_type : str | None
Specified the particular type of information represented by this place
information, such as a city name, or a point of interest.
References
----------
https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/place
"""
__slots__ = (
"data", "full_name", "id", "contained_within", "country",
"country_code", "geo", "name", "place_type"
)
def __init__(self, data):
self.data = data
self.full_name = data["full_name"]
self.id = data["id"]
self.contained_within = data.get("contained_within", [])
self.country = data.get("country")
self.country_code = data.get("country_code")
self.geo = data.get("geo")
self.name = data.get("name")
self.place_type = data.get("place_type")
def __repr__(self):
return f"<Place id={self.id} full_name={self.full_name}>"
def __str__(self):
return self.full_name

View File

@@ -0,0 +1,76 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
from tweepy.mixins import DataMapping, HashableID
from tweepy.utils import parse_datetime
#: All the potential fields for :class:`Poll` objects
POLL_FIELDS = [
"duration_minutes",
"end_datetime",
"id",
"options",
"voting_status",
]
class Poll(HashableID, DataMapping):
"""A poll included in a Tweet is not a primary object on any endpoint, but
can be found and expanded in the Tweet object.
The object is available for expansion with
``?expansions=attachments.poll_ids`` to get the condensed object with only
default fields. Use the expansion with the field parameter: ``poll.fields``
when requesting additional fields to complete the object.
.. versionadded:: 4.0
Attributes
----------
data : dict
The JSON data representing the poll.
id : str
Unique identifier of the expanded poll.
options : list
Contains objects describing each choice in the referenced poll.
duration_minutes : int | None
Specifies the total duration of this poll.
end_datetime : datetime.datetime | None
Specifies the end date and time for this poll.
voting_status : str | None
Indicates if this poll is still active and can receive votes, or if the
voting is now closed.
References
----------
https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/poll
"""
__slots__ = (
"data", "id", "options", "duration_minutes", "end_datetime",
"voting_status"
)
def __init__(self, data):
self.data = data
self.id = data["id"]
self.options = data["options"]
self.duration_minutes = data.get("duration_minutes")
self.end_datetime = data.get("end_datetime")
if self.end_datetime is not None:
self.end_datetime = parse_datetime(self.end_datetime)
self.voting_status = data.get("voting_status")
def __iter__(self):
return iter(self.options)
def __len__(self):
return len(self.options)
def __repr__(self):
return f"<Poll id={self.id} options={self.options}>"

View File

@@ -0,0 +1,167 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
from tweepy.mixins import DataMapping, HashableID
from tweepy.utils import parse_datetime
#: All the potential publically-available fields for :class:`Space` objects
PUBLIC_SPACE_FIELDS = [
"created_at",
"creator_id",
"ended_at",
"host_ids",
"id",
"invited_user_ids",
"is_ticketed",
"lang",
"participant_count",
"scheduled_start",
"speaker_ids",
"started_at",
"state",
"title",
"topic_ids",
"updated_at",
]
#: All the potential fields for :class:`Space` objects
SPACE_FIELDS = PUBLIC_SPACE_FIELDS + [
"subscriber_count",
]
class Space(HashableID, DataMapping):
"""Spaces allow expression and interaction via live audio conversations.
The Space data dictionary contains relevant metadata about a Space; all the
details are updated in real time.
User objects can found and expanded in the user resource. These objects are
available for expansion by adding at least one of ``host_ids``,
``creator_id``, ``speaker_ids``, ``mentioned_user_ids`` to the
``expansions`` query parameter.
Unlike Tweets, Spaces are ephemeral and become unavailable after they end
or when they are canceled by their creator. When your app handles Spaces
data, you are responsible for returning the most up-to-date information,
and must remove data that is no longer available from the platform. The
`Spaces lookup endpoints`_ can help you ensure you respect the users
expectations and intent.
.. versionadded:: 4.1
.. versionchanged:: 4.4
Added ``ended_at`` and ``topic_ids`` fields
.. versionchanged:: 4.6
Added ``subscriber_count`` field
.. versionchanged:: 4.14
Added ``creator_id`` field
Attributes
----------
data : dict
The JSON data representing the Space.
id : str
The unique identifier of the requested Space.
state : str
Indicates if the Space has started or will start in the future, or if
it has ended.
created_at : datetime.datetime | None
Creation time of this Space.
ended_at : datetime.datetime | None
Time when the Space was ended. Only available for ended Spaces.
host_ids : list
The unique identifier of the users who are hosting this Space.
lang : str | None
Language of the Space, if detected by Twitter. Returned as a BCP47
language tag.
is_ticketed : bool | None
Indicates is this is a ticketed Space.
invited_user_ids : list
The list of user IDs that were invited to join as speakers. Usually,
users in this list are invited to speak via the Invite user option.
participant_count : int | None
The current number of users in the Space, including Hosts and Speakers.
subscriber_count : int | None
The number of people who set a reminder to a Space.
scheduled_start : datetime.datetime | None
Indicates the start time of a scheduled Space, as set by the creator of
the Space. This field is returned only if the Space has been scheduled;
in other words, if the field is returned, it means the Space is a
scheduled Space.
speaker_ids : list
The list of users who were speaking at any point during the Space. This
list contains all the users in ``invited_user_ids`` in addition to any
user who requested to speak and was allowed via the Add speaker option.
started_at : datetime.datetime | None
Indicates the actual start time of a Space.
title : str | None
The title of the Space as specified by the creator.
topic_ids : list
A list of IDs of the topics selected by the creator of the Space.
updated_at : datetime.datetime | None
Specifies the date and time of the last update to any of the Space's
metadata, such as its title or scheduled time.
creator_id : int | None
References
----------
https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/space
.. _Spaces lookup endpoints: https://developer.twitter.com/en/docs/twitter-api/spaces/lookup/introduction
"""
__slots__ = (
"data", "id", "state", "created_at", "ended_at", "host_ids", "lang",
"is_ticketed", "invited_user_ids", "participant_count",
"subscriber_count", "scheduled_start", "speaker_ids", "started_at",
"title", "topic_ids", "updated_at", "creator_id"
)
def __init__(self, data):
self.data = data
self.id = data["id"]
self.state = data["state"]
self.created_at = data.get("created_at")
if self.created_at is not None:
self.created_at = parse_datetime(self.created_at)
self.ended_at = data.get("ended_at")
if self.ended_at is not None:
self.ended_at = parse_datetime(self.ended_at)
self.host_ids = data.get("host_ids", [])
self.lang = data.get("lang")
self.is_ticketed = data.get("is_ticketed")
self.invited_user_ids = data.get("invited_user_ids", [])
self.participant_count = data.get("participant_count")
self.subscriber_count = data.get("subscriber_count")
self.scheduled_start = data.get("scheduled_start")
if self.scheduled_start is not None:
self.scheduled_start = parse_datetime(self.scheduled_start)
self.speaker_ids = data.get("speaker_ids", [])
self.started_at = data.get("started_at")
if self.started_at is not None:
self.started_at = parse_datetime(self.started_at)
self.title = data.get("title")
self.topic_ids = data.get("topic_ids", [])
self.updated_at = data.get("updated_at")
if self.updated_at is not None:
self.updated_at = parse_datetime(self.updated_at)
self.creator_id = data.get("creator_id")
if self.creator_id is not None:
self.creator_id = int(self.creator_id)
def __repr__(self):
return f"<Space id={self.id} state={self.state}>"

View File

@@ -0,0 +1,687 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
# Appengine users: https://developers.google.com/appengine/docs/python/sockets/#making_httplib_use_sockets
from collections import namedtuple
import json
import logging
from math import inf
from platform import python_version
import ssl
import traceback
from threading import Thread
from time import sleep
from typing import NamedTuple
import requests
import urllib3
import tweepy
from tweepy.client import BaseClient, Response
from tweepy.errors import TweepyException
from tweepy.tweet import Tweet
log = logging.getLogger(__name__)
StreamResponse = namedtuple(
"StreamResponse", ("data", "includes", "errors", "matching_rules")
)
class BaseStream:
def __init__(self, *, chunk_size=512, daemon=False, max_retries=inf,
proxy=None, verify=True):
self.chunk_size = chunk_size
self.daemon = daemon
self.max_retries = max_retries
self.proxies = {"https": proxy} if proxy else {}
self.verify = verify
self.running = False
self.session = requests.Session()
self.thread = None
self.user_agent = (
f"Python/{python_version()} "
f"Requests/{requests.__version__} "
f"Tweepy/{tweepy.__version__}"
)
def _connect(
self, method, url, auth=None, params=None, headers=None, body=None,
timeout=21
):
self.running = True
error_count = 0
# https://developer.twitter.com/en/docs/twitter-api/v1/tweets/filter-realtime/guides/connecting
# https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/integrate/handling-disconnections
# https://developer.twitter.com/en/docs/twitter-api/tweets/volume-streams/integrate/handling-disconnections
network_error_wait = 0
network_error_wait_step = 0.25
network_error_wait_max = 16
http_error_wait = http_error_wait_start = 5
http_error_wait_max = 320
http_429_error_wait_start = 60
self.session.headers["User-Agent"] = self.user_agent
try:
while self.running and error_count <= self.max_retries:
try:
with self.session.request(
method, url, params=params, headers=headers, data=body,
timeout=timeout, stream=True, auth=auth,
verify=self.verify, proxies=self.proxies
) as resp:
if resp.status_code == 200:
error_count = 0
http_error_wait = http_error_wait_start
network_error_wait = 0
self.on_connect()
if not self.running:
break
for line in resp.iter_lines(
chunk_size=self.chunk_size
):
if line:
self.on_data(line)
else:
self.on_keep_alive()
if not self.running:
break
if resp.raw.closed:
self.on_closed(resp)
else:
self.on_request_error(resp.status_code)
if not self.running:
break
# The error text is logged here instead of in
# on_request_error to keep on_request_error
# backwards-compatible. In a future version, the
# Response should be passed to on_request_error.
log.error(
"HTTP error response text: %s", resp.text
)
error_count += 1
if resp.status_code in (420, 429):
if http_error_wait < http_429_error_wait_start:
http_error_wait = http_429_error_wait_start
sleep(http_error_wait)
http_error_wait *= 2
if http_error_wait > http_error_wait_max:
http_error_wait = http_error_wait_max
except (requests.ConnectionError, requests.Timeout,
requests.exceptions.ChunkedEncodingError,
ssl.SSLError, urllib3.exceptions.ReadTimeoutError,
urllib3.exceptions.ProtocolError) as exc:
# This is still necessary, as a SSLError can actually be
# thrown when using Requests
# If it's not time out treat it like any other exception
if isinstance(exc, ssl.SSLError):
if not (exc.args and "timed out" in str(exc.args[0])):
raise
self.on_connection_error()
if not self.running:
break
# The error text is logged here instead of in
# on_connection_error to keep on_connection_error
# backwards-compatible. In a future version, the error
# should be passed to on_connection_error.
log.error(
"Connection error: %s",
"".join(
traceback.format_exception_only(type(exc), exc)
).rstrip()
)
sleep(network_error_wait)
network_error_wait += network_error_wait_step
if network_error_wait > network_error_wait_max:
network_error_wait = network_error_wait_max
except Exception as exc:
self.on_exception(exc)
finally:
self.session.close()
self.running = False
self.on_disconnect()
def _threaded_connect(self, *args, **kwargs):
self.thread = Thread(target=self._connect, name="Tweepy Stream",
args=args, kwargs=kwargs, daemon=self.daemon)
self.thread.start()
return self.thread
def disconnect(self):
"""Disconnect the stream"""
self.running = False
def on_closed(self, response):
"""This is called when the stream has been closed by Twitter.
Parameters
----------
response : requests.Response
The Response from Twitter
"""
log.error("Stream connection closed by Twitter")
def on_connect(self):
"""This is called after successfully connecting to the streaming API.
"""
log.info("Stream connected")
def on_connection_error(self):
"""This is called when the stream connection errors or times out."""
log.error("Stream connection has errored or timed out")
def on_disconnect(self):
"""This is called when the stream has disconnected."""
log.info("Stream disconnected")
def on_exception(self, exception):
"""This is called when an unhandled exception occurs.
Parameters
----------
exception : Exception
The unhandled exception
"""
log.exception("Stream encountered an exception")
def on_keep_alive(self):
"""This is called when a keep-alive signal is received."""
log.debug("Received keep-alive signal")
def on_request_error(self, status_code):
"""This is called when a non-200 HTTP status code is encountered.
Parameters
----------
status_code : int
The HTTP status code encountered
"""
log.error("Stream encountered HTTP error: %d", status_code)
class StreamingClient(BaseClient, BaseStream):
"""Filter and sample realtime Tweets with Twitter API v2
.. versionadded:: 4.6
Parameters
----------
bearer_token : str
Twitter API Bearer Token
return_type : type[dict | requests.Response | Response]
Type to return from requests to the API
wait_on_rate_limit : bool
Whether or not to wait before retrying when a rate limit is
encountered. This applies to requests besides those that connect to a
stream (see ``max_retries``).
chunk_size : int
The default socket.read size. Default to 512, less than half the size
of a Tweet so that it reads Tweets with the minimal latency of 2 reads
per Tweet. Values higher than ~1kb will increase latency by waiting for
more data to arrive but may also increase throughput by doing fewer
socket read calls.
daemon : bool
Whether or not to use a daemon thread when using a thread to run the
stream
max_retries : int
Max number of times to retry connecting the stream
proxy : str | None
URL of the proxy to use when connecting to the stream
verify : bool | str
Either a boolean, in which case it controls whether to verify the
servers TLS certificate, or a string, in which case it must be a path
to a CA bundle to use.
Attributes
----------
running : bool
Whether there's currently a stream running
session : :class:`requests.Session`
Requests Session used to connect to the stream
thread : :class:`threading.Thread` | None
Thread used to run the stream
user_agent : str
User agent used when connecting to the stream
"""
def __init__(self, bearer_token, *, return_type=Response,
wait_on_rate_limit=False, **kwargs):
"""__init__( \
bearer_token, *, return_type=Response, wait_on_rate_limit=False, \
chunk_size=512, daemon=False, max_retries=inf, proxy=None, \
verify=True \
)
"""
BaseClient.__init__(self, bearer_token, return_type=return_type,
wait_on_rate_limit=wait_on_rate_limit)
BaseStream.__init__(self, **kwargs)
def _connect(self, method, endpoint, **kwargs):
self.session.headers["Authorization"] = f"Bearer {self.bearer_token}"
url = f"https://api.twitter.com/2/tweets/{endpoint}/stream"
super()._connect(method, url, **kwargs)
def _process_data(self, data, data_type=None):
if data_type is StreamRule:
if isinstance(data, list):
rules = []
for rule in data:
if "tag" in rule:
rules.append(StreamRule(
value=rule["value"], id=rule["id"], tag=rule["tag"]
))
else:
rules.append(StreamRule(value=rule["value"],
id=rule["id"]))
return rules
elif data is not None:
if "tag" in data:
return StreamRule(value=data["value"], id=data["id"],
tag=data["tag"])
else:
return StreamRule(value=data["value"], id=data["id"])
else:
return super()._process_data(data, data_type=data_type)
def add_rules(self, add, **params):
"""add_rules(add, *, dry_run)
Add rules to filtered stream.
Parameters
----------
add : list[StreamRule] | StreamRule
Specifies the operation you want to perform on the rules.
dry_run : bool
Set to true to test the syntax of your rule without submitting it.
This is useful if you want to check the syntax of a rule before
removing one or more of your existing rules.
Returns
-------
dict | requests.Response | Response
References
----------
https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/api-reference/post-tweets-search-stream-rules
"""
json = {"add": []}
if isinstance(add, StreamRule):
add = (add,)
for rule in add:
if rule.tag is not None:
json["add"].append({"value": rule.value, "tag": rule.tag})
else:
json["add"].append({"value": rule.value})
return self._make_request(
"POST", f"/2/tweets/search/stream/rules", params=params,
endpoint_parameters=("dry_run",), json=json, data_type=StreamRule
)
def delete_rules(self, ids, **params):
"""delete_rules(ids, *, dry_run)
Delete rules from filtered stream.
Parameters
----------
ids : int | str | list[int | str | StreamRule] | StreamRule
Array of rule IDs, each one representing a rule already active in
your stream. IDs must be submitted as strings.
dry_run : bool
Set to true to test the syntax of your rule without submitting it.
This is useful if you want to check the syntax of a rule before
removing one or more of your existing rules.
Returns
-------
dict | requests.Response | Response
References
----------
https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/api-reference/post-tweets-search-stream-rules
"""
json = {"delete": {"ids": []}}
if isinstance(ids, (int, str, StreamRule)):
ids = (ids,)
for id in ids:
if isinstance(id, StreamRule):
json["delete"]["ids"].append(str(id.id))
else:
json["delete"]["ids"].append(str(id))
return self._make_request(
"POST", f"/2/tweets/search/stream/rules", params=params,
endpoint_parameters=("dry_run",), json=json, data_type=StreamRule
)
def filter(self, *, threaded=False, **params):
"""filter( \
*, backfill_minutes=None, expansions=None, media_fields=None, \
place_fields=None, poll_fields=None, tweet_fields=None, \
user_fields=None, threaded=False \
)
Streams Tweets in real-time based on a specific set of filter rules.
If you are using the academic research product track, you can connect
up to two `redundant connections <filter redundant connections_>`_ to
maximize your streaming up-time.
The Tweets returned by this endpoint count towards the Project-level
`Tweet cap`_.
Parameters
----------
backfill_minutes : int | None
By passing this parameter, you can request up to five (5) minutes
worth of streaming data that you might have missed during a
disconnection to be delivered to you upon reconnection. The
backfilled Tweets will automatically flow through the reconnected
stream, with older Tweets generally being delivered before any
newly matching Tweets. You must include a whole number between 1
and 5 as the value to this parameter.
This feature will deliver duplicate Tweets, meaning that if you
were disconnected for 90 seconds, and you requested two minutes of
backfill, you will receive 30 seconds worth of duplicate Tweets.
Due to this, you should make sure your system is tolerant of
duplicate data.
This feature is currently only available to the Academic Research
product track.
expansions : list[str] | str
:ref:`expansions_parameter`
media_fields : list[str] | str
:ref:`media_fields_parameter`
place_fields : list[str] | str
:ref:`place_fields_parameter`
poll_fields : list[str] | str
:ref:`poll_fields_parameter`
tweet_fields : list[str] | str
:ref:`tweet_fields_parameter`
user_fields : list[str] | str
:ref:`user_fields_parameter`
threaded : bool
Whether or not to use a thread to run the stream
Raises
------
TweepyException
When the stream is already connected
Returns
-------
threading.Thread | None
The thread if ``threaded`` is set to ``True``, else ``None``
References
----------
https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/api-reference/get-tweets-search-stream
.. _filter redundant connections: https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/integrate/recovery-and-redundancy-features
.. _Tweet cap: https://developer.twitter.com/en/docs/twitter-api/tweet-caps
"""
if self.running:
raise TweepyException("Stream is already connected")
method = "GET"
endpoint = "search"
params = self._process_params(
params, endpoint_parameters=(
"backfill_minutes", "expansions", "media.fields",
"place.fields", "poll.fields", "tweet.fields", "user.fields"
)
)
if threaded:
return self._threaded_connect(method, endpoint, params=params)
else:
self._connect(method, endpoint, params=params)
def get_rules(self, **params):
"""get_rules(*, ids)
Return a list of rules currently active on the streaming endpoint,
either as a list or individually.
Parameters
----------
ids : list[str] | str
Comma-separated list of rule IDs. If omitted, all rules are
returned.
Returns
-------
dict | requests.Response | Response
References
----------
https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/api-reference/get-tweets-search-stream-rules
"""
return self._make_request(
"GET", f"/2/tweets/search/stream/rules", params=params,
endpoint_parameters=("ids",), data_type=StreamRule
)
def sample(self, *, threaded=False, **params):
"""sample( \
*, backfill_minutes=None, expansions=None, media_fields=None, \
place_fields=None, poll_fields=None, tweet_fields=None, \
user_fields=None, threaded=False \
)
Streams about 1% of all Tweets in real-time.
If you are using the academic research product track, you can connect
up to two `redundant connections <sample redundant connections_>`_ to
maximize your streaming up-time.
Parameters
----------
backfill_minutes : int | None
By passing this parameter, you can request up to five (5) minutes
worth of streaming data that you might have missed during a
disconnection to be delivered to you upon reconnection. The
backfilled Tweets will automatically flow through the reconnected
stream, with older Tweets generally being delivered before any
newly matching Tweets. You must include a whole number between 1
and 5 as the value to this parameter.
This feature will deliver duplicate Tweets, meaning that if you
were disconnected for 90 seconds, and you requested two minutes of
backfill, you will receive 30 seconds worth of duplicate Tweets.
Due to this, you should make sure your system is tolerant of
duplicate data.
This feature is currently only available to the Academic Research
product track.
expansions : list[str] | str
:ref:`expansions_parameter`
media_fields : list[str] | str
:ref:`media_fields_parameter`
place_fields : list[str] | str
:ref:`place_fields_parameter`
poll_fields : list[str] | str
:ref:`poll_fields_parameter`
tweet_fields : list[str] | str
:ref:`tweet_fields_parameter`
user_fields : list[str] | str
:ref:`user_fields_parameter`
threaded : bool
Whether or not to use a thread to run the stream
Raises
------
TweepyException
When the stream is already connected
Returns
-------
threading.Thread | None
The thread if ``threaded`` is set to ``True``, else ``None``
References
----------
https://developer.twitter.com/en/docs/twitter-api/tweets/volume-streams/api-reference/get-tweets-sample-stream
.. _sample redundant connections: https://developer.twitter.com/en/docs/twitter-api/tweets/volume-streams/integrate/recovery-and-redundancy-features
"""
if self.running:
raise TweepyException("Stream is already connected")
method = "GET"
endpoint = "sample"
params = self._process_params(
params, endpoint_parameters=(
"backfill_minutes", "expansions", "media.fields",
"place.fields", "poll.fields", "tweet.fields", "user.fields"
)
)
if threaded:
return self._threaded_connect(method, endpoint, params=params)
else:
self._connect(method, endpoint, params=params)
def on_data(self, raw_data):
"""This is called when raw data is received from the stream.
This method handles sending the data to other methods.
Parameters
----------
raw_data : JSON
The raw data from the stream
References
----------
https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/integrate/consuming-streaming-data
"""
data = json.loads(raw_data)
tweet = None
includes = {}
errors = []
matching_rules = []
if "data" in data:
tweet = Tweet(data["data"])
self.on_tweet(tweet)
if "includes" in data:
includes = self._process_includes(data["includes"])
self.on_includes(includes)
if "errors" in data:
errors = data["errors"]
self.on_errors(errors)
if "matching_rules" in data:
matching_rules = [
StreamRule(id=rule["id"], tag=rule["tag"])
for rule in data["matching_rules"]
]
self.on_matching_rules(matching_rules)
self.on_response(
StreamResponse(tweet, includes, errors, matching_rules)
)
def on_tweet(self, tweet):
"""This is called when a Tweet is received.
Parameters
----------
tweet : Tweet
The Tweet received
"""
pass
def on_includes(self, includes):
"""This is called when includes are received.
Parameters
----------
includes : dict
The includes received
"""
pass
def on_errors(self, errors):
"""This is called when errors are received.
Parameters
----------
errors : dict
The errors received
"""
log.error("Received errors: %s", errors)
def on_matching_rules(self, matching_rules):
"""This is called when matching rules are received.
Parameters
----------
matching_rules : list[StreamRule]
The matching rules received
"""
pass
def on_response(self, response):
"""This is called when a response is received.
Parameters
----------
response : StreamResponse
The response received
"""
log.debug("Received response: %s", response)
class StreamRule(NamedTuple):
"""Rule for filtered stream
.. versionadded:: 4.6
Parameters
----------
value : str | None
The rule text. If you are using a `Standard Project`_ at the Basic
`access level`_, you can use the basic set of `operators`_, can submit
up to 25 concurrent rules, and can submit rules up to 512 characters
long. If you are using an `Academic Research Project`_ at the Basic
access level, you can use all available operators, can submit up to
1,000 concurrent rules, and can submit rules up to 1,024 characters
long.
tag : str | None
The tag label. This is a free-form text you can use to identify the
rules that matched a specific Tweet in the streaming response. Tags can
be the same across rules.
id : str | None
Unique identifier of this rule. This is returned as a string.
.. _Standard Project: https://developer.twitter.com/en/docs/projects
.. _access level: https://developer.twitter.com/en/products/twitter-api/early-access/guide#na_1
.. _operators: https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query
.. _Academic Research Project: https://developer.twitter.com/en/docs/projects
"""
value: str = None
tag: str = None
id: str = None

View File

@@ -0,0 +1,268 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
import warnings
from tweepy.mixins import DataMapping, HashableID
from tweepy.utils import parse_datetime
#: All the potential publically-available fields for :class:`Tweet` objects
PUBLIC_TWEET_FIELDS = [
"attachments",
"author_id",
"context_annotations",
"conversation_id",
"created_at",
"edit_controls",
"edit_history_tweet_ids",
"entities",
"geo",
"id",
"in_reply_to_user_id",
"lang",
"possibly_sensitive",
"public_metrics",
"referenced_tweets",
"reply_settings",
"source",
"text",
"withheld",
]
#: All the potential fields for :class:`Tweet` objects
TWEET_FIELDS = PUBLIC_TWEET_FIELDS + [
"non_public_metrics",
"organic_metrics",
"promoted_metrics",
]
class Tweet(HashableID, DataMapping):
"""Tweets are the basic building block of all things Twitter. The Tweet
object has a long list of root-level fields, such as ``id``, ``text``,
and ``created_at``. Tweet objects are also the parent object to several
child objects including ``user``, ``media``, ``poll``, and ``place``. Use
the field parameter ``tweet.fields`` when requesting these root-level
fields on the Tweet object.
The Tweet object that can be found and expanded in the user resource.
Additional Tweets related to the requested Tweet can also be found and
expanded in the Tweet resource. The object is available for expansion with
``?expansions=pinned_tweet_id`` in the user resource or
``?expansions=referenced_tweets.id`` in the Tweet resource to get the
object with only default fields. Use the expansion with the field
parameter: ``tweet.fields`` when requesting additional fields to complete
the object.
.. versionadded:: 4.0
.. versionchanged:: 4.11
Added ``edit_history_tweet_ids`` and ``edit_controls`` fields
Attributes
----------
data : dict
The JSON data representing the Tweet.
id : int
The unique identifier of the requested Tweet.
text : str
The actual UTF-8 text of the Tweet. See `twitter-text`_ for details on
what characters are currently considered valid.
edit_history_tweet_ids : list[int]
Unique identifiers indicating all versions of a Tweet. For Tweets with
no edits, there will be one ID. For Tweets with an edit history, there
will be multiple IDs, arranged in ascending order reflecting the order
of edits. The most recent version is the last position of the array.
attachments : dict | None
Specifies the type of attachments (if any) present in this Tweet.
author_id : int | None
The unique identifier of the User who posted this Tweet.
context_annotations : list
Contains context annotations for the Tweet.
conversation_id : int | None
The Tweet ID of the original Tweet of the conversation (which includes
direct replies, replies of replies).
created_at : datetime.datetime | None
Creation time of the Tweet.
edit_controls : dict | None
When present, this indicates how much longer the Tweet can be edited
and the number of remaining edits. Tweets are only editable for the
first 30 minutes after creation and can be edited up to five times.
entities : dict | None
Entities which have been parsed out of the text of the Tweet.
Additionally see entities in Twitter Objects.
geo : dict | None
Contains details about the location tagged by the user in this Tweet,
if they specified one.
in_reply_to_user_id : int | None
If the represented Tweet is a reply, this field will contain the
original Tweets author ID. This will not necessarily always be the
user directly mentioned in the Tweet.
lang : str | None
Language of the Tweet, if detected by Twitter. Returned as a BCP47
language tag.
non_public_metrics : dict | None
Non-public engagement metrics for the Tweet at the time of the request.
Requires user context authentication.
organic_metrics : dict | None
Engagement metrics, tracked in an organic context, for the Tweet at the
time of the request.
Requires user context authentication.
possibly_sensitive : bool | None
This field only surfaces when a Tweet contains a link. The meaning of
the field doesnt pertain to the Tweet content itself, but instead it
is an indicator that the URL contained in the Tweet may contain content
or media identified as sensitive content.
promoted_metrics : dict | None
Engagement metrics, tracked in a promoted context, for the Tweet at the
time of the request.
Requires user context authentication.
public_metrics : dict | None
Public engagement metrics for the Tweet at the time of the request.
referenced_tweets : list[ReferencedTweet] | None
A list of Tweets this Tweet refers to. For example, if the parent Tweet
is a Retweet, a Retweet with comment (also known as Quoted Tweet) or a
Reply, it will include the related Tweet referenced to by its parent.
reply_settings : str | None
Shows you who can reply to a given Tweet. Fields returned are
"everyone", "mentioned_users", and "followers".
source : str | None
The name of the app the user Tweeted from.
.. note::
As of December 20, 2022, this field has been removed from the Tweet
payload. [#]_
withheld : dict | None
When present, contains withholding details for `withheld content`_.
References
----------
https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/tweet
.. _twitter-text: https://github.com/twitter/twitter-text/
.. _withheld content: https://help.twitter.com/en/rules-and-policies/tweet-withheld-by-country
"""
__slots__ = (
"data", "id", "text", "edit_history_tweet_ids", "attachments",
"author_id", "context_annotations", "conversation_id", "created_at",
"edit_controls", "entities", "geo", "in_reply_to_user_id", "lang",
"non_public_metrics", "organic_metrics", "possibly_sensitive",
"promoted_metrics", "public_metrics", "referenced_tweets",
"reply_settings", "source", "withheld"
)
def __init__(self, data):
self.data = data
self.id = int(data["id"])
self.text = data["text"]
try:
self.edit_history_tweet_ids = list(
map(int, data["edit_history_tweet_ids"])
)
except KeyError:
warnings.warn(
"Tweet data missing default edit_history_tweet_ids field",
RuntimeWarning,
stacklevel=2
)
self.attachments = data.get("attachments")
self.author_id = data.get("author_id")
if self.author_id is not None:
self.author_id = int(self.author_id)
self.context_annotations = data.get("context_annotations", [])
self.conversation_id = data.get("conversation_id")
if self.conversation_id is not None:
self.conversation_id = int(self.conversation_id)
self.created_at = data.get("created_at")
if self.created_at is not None:
self.created_at = parse_datetime(self.created_at)
self.edit_controls = data.get("edit_controls")
if self.edit_controls is not None:
self.edit_controls["edits_remaining"] = int(
self.edit_controls["edits_remaining"]
)
self.edit_controls["editable_until"] = parse_datetime(
self.edit_controls["editable_until"]
)
self.entities = data.get("entities")
self.geo = data.get("geo")
self.in_reply_to_user_id = data.get("in_reply_to_user_id")
if self.in_reply_to_user_id is not None:
self.in_reply_to_user_id = int(self.in_reply_to_user_id)
self.lang = data.get("lang")
self.non_public_metrics = data.get("non_public_metrics")
self.organic_metrics = data.get("organic_metrics")
self.possibly_sensitive = data.get("possibly_sensitive")
self.promoted_metrics = data.get("promoted_metrics")
self.public_metrics = data.get("public_metrics")
self.referenced_tweets = data.get("referenced_tweets")
if self.referenced_tweets is not None:
self.referenced_tweets = [
ReferencedTweet(referenced_tweet)
for referenced_tweet in self.referenced_tweets
]
self.reply_settings = data.get("reply_settings")
self.source = data.get("source")
self.withheld = data.get("withheld")
def __len__(self):
return len(self.text)
def __repr__(self):
return f"<Tweet id={self.id} text={repr(self.text)}>"
def __str__(self):
return self.text
class ReferencedTweet(HashableID, DataMapping):
""".. versionadded:: 4.0
.. versionchanged:: 4.12
Changed ``type`` to be optional
Attributes
----------
data : dict
The JSON data representing the referenced Tweet.
id : int
The unique identifier of the referenced Tweet.
type : str | None
References
----------
https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/tweet
"""
__slots__ = ("data", "id", "type")
def __init__(self, data):
self.data = data
self.id = int(data["id"])
self.type = data.get("type")
def __repr__(self):
representation = f"<ReferencedTweet id={self.id}"
if self.type is not None:
representation += f" type={self.type}"
representation += '>'
return representation

View File

@@ -0,0 +1,140 @@
# Tweepy
# Copyright 2009-2023 Joshua Roesslein
# See LICENSE for details.
from tweepy.mixins import DataMapping, HashableID
from tweepy.utils import parse_datetime
#: All the potential fields for :class:`User` objects
USER_FIELDS = [
"created_at",
"description",
"entities",
"id",
"location",
"name",
"pinned_tweet_id",
"profile_image_url",
"protected",
"public_metrics",
"url",
"username",
"verified",
"verified_type",
"withheld",
]
class User(HashableID, DataMapping):
"""The user object contains Twitter user account metadata describing the
referenced user. The user object is the primary object returned in the
`users lookup`_ endpoint. When requesting additional user fields on this
endpoint, simply use the fields parameter ``user.fields``.
The user object can also be found as a child object and expanded in the
Tweet object. The object is available for expansion with
``?expansions=author_id`` or ``?expansions=in_reply_to_user_id`` to get the
condensed object with only default fields. Use the expansion with the field
parameter: ``user.fields`` when requesting additional fields to complete
the object.
.. versionadded:: 4.0
.. versionchanged:: 4.13
Added ``verified_type`` field
Attributes
----------
data : dict
The JSON data representing the user.
id : int
The unique identifier of this user.
name : str
The name of the user, as theyve defined it on their profile. Not
necessarily a persons name. Typically capped at 50 characters, but
subject to change.
username : str
The Twitter screen name, handle, or alias that this user identifies
themselves with. Usernames are unique but subject to change. Typically
a maximum of 15 characters long, but some historical accounts may exist
with longer names.
created_at : datetime.datetime | None
The UTC datetime that the user account was created on Twitter.
description : str | None
The text of this user's profile description (also known as bio), if the
user provided one.
entities : dict | None
Contains details about text that has a special meaning in the user's
description.
location : str | None
The location specified in the user's profile, if the user provided one.
As this is a freeform value, it may not indicate a valid location, but
it may be fuzzily evaluated when performing searches with location
queries.
pinned_tweet_id : int | None
Unique identifier of this user's pinned Tweet.
profile_image_url : str | None
The URL to the profile image for this user, as shown on the user's
profile.
protected : bool | None
Indicates if this user has chosen to protect their Tweets (in other
words, if this user's Tweets are private).
public_metrics : dict | None
Contains details about activity for this user.
url : str | None
The URL specified in the user's profile, if present.
verified : bool | None
Indicates if this user is a verified Twitter User.
verified_type : str | None
Indicates the type of verification a user account has (blue, business,
government or none).
withheld : dict | None
Contains withholding details for `withheld content`_, if applicable.
References
----------
https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/user
.. _users lookup: https://developer.twitter.com/en/docs/twitter-api/users/lookup/introduction.html
.. _withheld content: https://help.twitter.com/en/rules-and-policies/tweet-withheld-by-country
"""
__slots__ = (
"data", "id", "name", "username", "created_at", "description",
"entities", "location", "pinned_tweet_id", "profile_image_url",
"protected", "public_metrics", "url", "verified", "verified_type",
"withheld"
)
def __init__(self, data):
self.data = data
self.id = int(data["id"])
self.name = data["name"]
self.username = data["username"]
self.created_at = data.get("created_at")
if self.created_at is not None:
self.created_at = parse_datetime(self.created_at)
self.description = data.get("description")
self.entities = data.get("entities")
self.location = data.get("location")
self.pinned_tweet_id = data.get("pinned_tweet_id")
if self.pinned_tweet_id is not None:
self.pinned_tweet_id = int(self.pinned_tweet_id)
self.profile_image_url = data.get("profile_image_url")
self.protected = data.get("protected")
self.public_metrics = data.get("public_metrics")
self.url = data.get("url")
self.verified = data.get("verified")
self.verified_type = data.get("verified_type")
self.withheld = data.get("withheld")
def __repr__(self):
return f"<User id={self.id} name={self.name} username={self.username}>"
def __str__(self):
return self.username

View File

@@ -0,0 +1,16 @@
# Tweepy
# Copyright 2010-2023 Joshua Roesslein
# See LICENSE for details.
import datetime
def list_to_csv(item_list):
if item_list:
return ','.join(map(str, item_list))
def parse_datetime(datetime_string):
return datetime.datetime.strptime(
datetime_string, "%Y-%m-%dT%H:%M:%S.%f%z"
).replace(tzinfo=datetime.timezone.utc)