admin管理员组文章数量:1320647
The code is attached below. It works fine until it gets to ai: df_ai
in the database
dict.
data = pd.read_csv('survey_results_public.csv')
df_demographics = data[['ResponseId', 'MainBranch', 'Age', 'Employment', 'EdLevel', 'YearsCode', 'Country']]
df_learn_code = data[['ResponseId', 'LearnCode']]
df_language = data[['ResponseId', 'LanguageAdmired']]
df_ai = data[['ResponseId', 'AISelect', 'AISent', 'AIAcc', 'AIComplex', 'AIThreat', 'AIBen', 'AIToolCurrently Using']]
database = {'demographics': df_demographics, 'learn_code': df_learn_code, 'language': df_language, 'ai': df_ai}
def find_semicolons(dataframe):
result = []
firstFifty = dataframe.head(50)
for column in firstFifty.columns:
if firstFifty[column].apply(lambda x: ';' in str(x)).any():
result.append(column)
return result
def transform_dataframe(dataframe):
result = find_semicolons(dataframe)
for column in result:
values = [str(x).split(';') for x in dataframe[column].unique().tolist()]
flat_values = []
for x in values:
flat_values.extend(x)
flat_values = set(flat_values)
for x in flat_values:
dataframe[x] = dataframe[column].str.contains(x, na=False).astype(int)
for x in database:
transform_dataframe(database.get(x))
database.get(x).to_csv(x + '.csv')
Here's the traceback
Traceback (most recent call last):
File "/Users/shalim/PycharmProjects/work/stackoverflow.py", line 45, in <module>
transform_dataframe(database.get(x))
File "/Users/shalim/PycharmProjects/work/stackoverflow.py", line 40, in transform_dataframe
dataframe[x] = dataframe[column].str.contains(x, na=False).astype(int)
File "/Users/shalim/PycharmProjects/work/venv/lib/python3.9/site-packages/pandas/core/strings/accessor.py", line 137, in wrapper
return func(self, *args, **kwargs)
File "/Users/shalim/PycharmProjects/work/venv/lib/python3.9/site-packages/pandas/core/strings/accessor.py", line 1327, in contains
if regex and repile(pat).groups:
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/re.py", line 252, in compile
return _compile(pattern, flags)
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/re.py", line 304, in _compile
p = sre_compilepile(pattern, flags)
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/sre_compile.py", line 764, in compile
p = sre_parse.parse(p, flags)
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/sre_parse.py", line 948, in parse
p = _parse_sub(source, state, flags & SRE_FLAG_VERBOSE, 0)
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/sre_parse.py", line 443, in _parse_sub
itemsappend(_parse(source, state, verbose, nested + 1,
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/sre_parse.py", line 671, in _parse
raise source.error("multiple repeat",
re.error: multiple repeat at position 2
The code is attached below. It works fine until it gets to ai: df_ai
in the database
dict.
data = pd.read_csv('survey_results_public.csv')
df_demographics = data[['ResponseId', 'MainBranch', 'Age', 'Employment', 'EdLevel', 'YearsCode', 'Country']]
df_learn_code = data[['ResponseId', 'LearnCode']]
df_language = data[['ResponseId', 'LanguageAdmired']]
df_ai = data[['ResponseId', 'AISelect', 'AISent', 'AIAcc', 'AIComplex', 'AIThreat', 'AIBen', 'AIToolCurrently Using']]
database = {'demographics': df_demographics, 'learn_code': df_learn_code, 'language': df_language, 'ai': df_ai}
def find_semicolons(dataframe):
result = []
firstFifty = dataframe.head(50)
for column in firstFifty.columns:
if firstFifty[column].apply(lambda x: ';' in str(x)).any():
result.append(column)
return result
def transform_dataframe(dataframe):
result = find_semicolons(dataframe)
for column in result:
values = [str(x).split(';') for x in dataframe[column].unique().tolist()]
flat_values = []
for x in values:
flat_values.extend(x)
flat_values = set(flat_values)
for x in flat_values:
dataframe[x] = dataframe[column].str.contains(x, na=False).astype(int)
for x in database:
transform_dataframe(database.get(x))
database.get(x).to_csv(x + '.csv')
Here's the traceback
Traceback (most recent call last):
File "/Users/shalim/PycharmProjects/work/stackoverflow.py", line 45, in <module>
transform_dataframe(database.get(x))
File "/Users/shalim/PycharmProjects/work/stackoverflow.py", line 40, in transform_dataframe
dataframe[x] = dataframe[column].str.contains(x, na=False).astype(int)
File "/Users/shalim/PycharmProjects/work/venv/lib/python3.9/site-packages/pandas/core/strings/accessor.py", line 137, in wrapper
return func(self, *args, **kwargs)
File "/Users/shalim/PycharmProjects/work/venv/lib/python3.9/site-packages/pandas/core/strings/accessor.py", line 1327, in contains
if regex and repile(pat).groups:
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/re.py", line 252, in compile
return _compile(pattern, flags)
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/re.py", line 304, in _compile
p = sre_compilepile(pattern, flags)
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/sre_compile.py", line 764, in compile
p = sre_parse.parse(p, flags)
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/sre_parse.py", line 948, in parse
p = _parse_sub(source, state, flags & SRE_FLAG_VERBOSE, 0)
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/sre_parse.py", line 443, in _parse_sub
itemsappend(_parse(source, state, verbose, nested + 1,
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/sre_parse.py", line 671, in _parse
raise source.error("multiple repeat",
re.error: multiple repeat at position 2
Share
Improve this question
edited Jan 18 at 15:13
m. lekk
asked Jan 18 at 15:08
m. lekkm. lekk
7811 gold badge5 silver badges13 bronze badges
2
- Post traceback error, please – toyota Supra Commented Jan 18 at 15:12
- 1 @toyotaSupra sorry, fot about that. – m. lekk Commented Jan 18 at 15:13
1 Answer
Reset to default 0Pandas .str.contains
performs a regex search rather than a substring search, by default. That means that characters like *
or +
get treated as regex metacharacters instead of a literal asterisk or plus sign.
It looks like you're trying to perform a substring search, not a regex search. Your x
isn't a valid regex, and even if it was, it wouldn't mean what you want. You need to specify regex=False
:
dataframe[x] = dataframe[column].str.contains(x, na=False, regex=False).astype(int)
本文标签:
版权声明:本文标题:python - Why am I getting "raise source.error("multiple repeat", re.error: multiple repeat at positio 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1742067765a2418937.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论